On Fri, Aug 24, 2012 at 02:53:01PM +0200, Michel Dänzer wrote: > From: Michel Dänzer <michel.daen...@amd.com> > > Fixes piglit fbo-blending-formats. > > Signed-off-by: Michel Dänzer <michel.daen...@amd.com> Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > --- > src/gallium/drivers/radeon/SIInstructions.td | 4 +- > src/gallium/drivers/radeon/SIIntrinsics.td | 1 + > src/gallium/drivers/radeonsi/radeonsi_pipe.h | 3 +- > src/gallium/drivers/radeonsi/radeonsi_shader.c | 51 ++++++++++++++---- > src/gallium/drivers/radeonsi/si_state.c | 69 > +++++++++++++++++++++++- > src/gallium/drivers/radeonsi/si_state_draw.c | 4 -- > 6 files changed, 114 insertions(+), 18 deletions(-) > > diff --git a/src/gallium/drivers/radeon/SIInstructions.td > b/src/gallium/drivers/radeon/SIInstructions.td > index f09d604..3047321 100644 > --- a/src/gallium/drivers/radeon/SIInstructions.td > +++ b/src/gallium/drivers/radeon/SIInstructions.td > @@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", > []>; > ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", > []>; > ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", > []>; > ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", > []>; > -////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", > []>; > +defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", > + [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] > +>; > ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; > ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; > def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; > diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td > b/src/gallium/drivers/radeon/SIIntrinsics.td > index 6eadc94..b9544f1 100644 > --- a/src/gallium/drivers/radeon/SIIntrinsics.td > +++ b/src/gallium/drivers/radeon/SIIntrinsics.td > @@ -14,6 +14,7 @@ > > let TargetPrefix = "SI", isTarget = 1 in { > > + def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, > llvm_float_ty], [IntrNoMem]>; > def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, > llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, > llvm_float_ty], []>; > /* XXX: We may need a seperate intrinsic here for loading integer values */ > def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, > llvm_i32_ty], []>; > diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h > b/src/gallium/drivers/radeonsi/radeonsi_pipe.h > index 989bb49..099b509 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h > +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h > @@ -134,7 +134,8 @@ struct r600_context { > unsigned saved_render_cond_mode; > /* shader information */ > unsigned sprite_coord_enable; > - boolean export_16bpc; > + unsigned export_16bpc; > + unsigned spi_shader_col_format; > unsigned alpha_ref; > boolean alpha_ref_dirty; > struct r600_textures_info vs_samplers; > diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c > b/src/gallium/drivers/radeonsi/radeonsi_shader.c > index fd614dd..98866c4 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c > +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c > @@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct > lp_build_tgsi_context *bld_base, > unsigned compressed = 0; > unsigned chan; > > - for (chan = 0; chan < 4; chan++ ) { > - LLVMValueRef out_ptr = > - si_shader_ctx->radeon_bld.soa.outputs[index][chan]; > - /* +5 because the first output value will be > - * the 6th argument to the intrinsic. */ > - args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, > - out_ptr, ""); > + if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { > + int cbuf = target - V_008DFC_SQ_EXP_MRT; > + > + if (cbuf >= 0 && cbuf < 8) { > + struct r600_context *rctx = si_shader_ctx->rctx; > + compressed = (rctx->export_16bpc >> cbuf) & 0x1; > + } > + } > + > + if (compressed) { > + /* Pixel shader needs to pack output values before export */ > + for (chan = 0; chan < 2; chan++ ) { > + LLVMValueRef *out_ptr = > + si_shader_ctx->radeon_bld.soa.outputs[index]; > + args[0] = LLVMBuildLoad(base->gallivm->builder, > + out_ptr[2 * chan], ""); > + args[1] = LLVMBuildLoad(base->gallivm->builder, > + out_ptr[2 * chan + 1], ""); > + args[chan + 5] = > + build_intrinsic(base->gallivm->builder, > + "llvm.SI.packf16", > + > LLVMInt32TypeInContext(base->gallivm->context), > + args, 2, > + LLVMReadNoneAttribute); > + args[chan + 7] = args[chan + 5]; > + } > + > + /* Set COMPR flag */ > + args[4] = uint->one; > + } else { > + for (chan = 0; chan < 4; chan++ ) { > + LLVMValueRef out_ptr = > + > si_shader_ctx->radeon_bld.soa.outputs[index][chan]; > + /* +5 because the first output value will be > + * the 6th argument to the intrinsic. */ > + args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, > + out_ptr, ""); > + } > + > + /* Clear COMPR flag */ > + args[4] = uint->zero; > } > > /* XXX: This controls which components of the output > @@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct > lp_build_tgsi_context *bld_base, > /* Specify the target we are exporting */ > args[3] = lp_build_const_int32(base->gallivm, target); > > - /* Set COMPR flag */ > - args[4] = uint->zero; > - > /* XXX: We probably need to keep track of the output > * values, so we know what we are passing to the next > * stage. */ > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 36ac6bf..5f7f415 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t > colorformat) > } > } > > +/* Returns the size in bits of the widest component of a CB format */ > +static unsigned si_colorformat_max_comp_size(uint32_t colorformat) > +{ > + switch(colorformat) { > + case V_028C70_COLOR_4_4_4_4: > + return 4; > + > + case V_028C70_COLOR_1_5_5_5: > + case V_028C70_COLOR_5_5_5_1: > + return 5; > + > + case V_028C70_COLOR_5_6_5: > + return 6; > + > + case V_028C70_COLOR_8: > + case V_028C70_COLOR_8_8: > + case V_028C70_COLOR_8_8_8_8: > + return 8; > + > + case V_028C70_COLOR_10_10_10_2: > + case V_028C70_COLOR_2_10_10_10: > + return 10; > + > + case V_028C70_COLOR_10_11_11: > + case V_028C70_COLOR_11_11_10: > + return 11; > + > + case V_028C70_COLOR_16: > + case V_028C70_COLOR_16_16: > + case V_028C70_COLOR_16_16_16_16: > + return 16; > + > + case V_028C70_COLOR_8_24: > + case V_028C70_COLOR_24_8: > + return 24; > + > + case V_028C70_COLOR_32: > + case V_028C70_COLOR_32_32: > + case V_028C70_COLOR_32_32_32_32: > + case V_028C70_COLOR_X24_8_32_FLOAT: > + return 32; > + } > + > + assert(!"Unknown maximum component size"); > + return 0; > +} > + > static uint32_t si_translate_dbformat(enum pipe_format format) > { > switch (format) { > @@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct > si_pm4_state *pm4, > const struct util_format_description *desc; > int i; > unsigned blend_clamp = 0, blend_bypass = 0; > + unsigned max_comp_size; > > surf = (struct r600_surface *)state->cbufs[cb]; > rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; > @@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct > si_pm4_state *pm4, > } > si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info); > si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, > color_attrib); > + > + /* Determine pixel shader export format */ > + max_comp_size = si_colorformat_max_comp_size(format); > + if (ntype == V_028C70_NUMBER_SRGB || > + ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) > && > + max_comp_size <= 10) || > + (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { > + rctx->export_16bpc |= 1 << cb; > + rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << > (4 * cb); > + } else > + rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << > (4 * cb); > } > > static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4, > @@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct > pipe_context *ctx, > > /* build states */ > rctx->have_depth_fb = 0; > + rctx->export_16bpc = 0; > + rctx->spi_shader_col_format = 0; > for (int i = 0; i < state->nr_cbufs; i++) { > si_cb(rctx, pm4, state, i); > } > + assert(!(rctx->export_16bpc & ~0xff)); > si_db(rctx, pm4, state); > > shader_mask = 0; > @@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct > pipe_context *ctx, > si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000); > si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); > si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask); > + si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, > + rctx->spi_shader_col_format); > si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000); > > si_pm4_set_state(rctx, framebuffer, pm4); > @@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct > pipe_context *ctx, > if (sel->type == PIPE_SHADER_FRAGMENT) { > if (sel->fs_write_all) > key |= rctx->framebuffer.nr_cbufs; > + key |= rctx->export_16bpc << 4; > /*if (rctx->queued.named.rasterizer) > - key |= rctx->queued.named.rasterizer->flatshade << > 4;*/ > - /*key |== rctx->two_side << 5;*/ > + key |= rctx->queued.named.rasterizer->flatshade << > 12;*/ > + /*key |== rctx->two_side << 13;*/ > } > > return key; > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index 95821dc..5f8e211 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, > struct si_pipe_shader *s > /* XXX: Depends on Z buffer format? */ > si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0); > > - /* XXX: Depends on color buffer format? */ > - si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, > - > S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR)); > - > va = r600_resource_va(ctx->screen, (void *)shader->bo); > si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); > si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); > -- > 1.7.10.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev