Sending from a mobile, pardon my terseness. ~ C. On Aug 23, 2011 2:42 PM, "Tom Stellard" <tstel...@gmail.com> wrote: > According to the GLSL spec, the implementor can decide which way to round > when the fraction is .5. The r300 compiler will round down, so we can use > CND and save an instruction. > --- > > MLAA should work on r300g (r500 only) with this patch. I've tested > with the kasanen-post-process-v2 branch and it looks OK to me, but it > would be nice to have a second opinion. > > I was testing with: pp_jimenezmlaa=8 glxgears > > src/gallium/drivers/r300/compiler/radeon_opcodes.c | 7 +++ > src/gallium/drivers/r300/compiler/radeon_opcodes.h | 3 + > .../drivers/r300/compiler/radeon_program_alu.c | 54 ++++++++++++++++++++ > src/gallium/drivers/r300/r300_tgsi_to_rc.c | 2 +- > 4 files changed, 65 insertions(+), 1 deletions(-) > > diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c > index afd78ad..527db9a 100644 > --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c > +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c > @@ -246,6 +246,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { > .IsStandardScalar = 1 > }, > { > + .Opcode = RC_OPCODE_ROUND, > + .Name = "ROUND", > + .NumSrcRegs = 1, > + .HasDstReg = 1, > + .IsComponentwise = 1 > + }, > + { > .Opcode = RC_OPCODE_RSQ, > .Name = "RSQ", > .NumSrcRegs = 1, > diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h > index b586882..968dc7b 100644 > --- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h > +++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h > @@ -133,6 +133,9 @@ typedef enum { > /** scalar instruction: dst = 1 / src0.x */ > RC_OPCODE_RCP, > > + /** vec4 instruction: dst.c = frc(src0.c) > 0.5 ? ceil(src0.c) : floor(src0.c) */ > + RC_OPCODE_ROUND, > + > /** scalar instruction: dst = 1 / sqrt(src0.x) */ > RC_OPCODE_RSQ, > > diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c > index e273bc4..0bfd2dc 100644 > --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c > +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c > @@ -104,6 +104,13 @@ static const struct rc_src_register builtin_one = { > .Index = 0, > .Swizzle = RC_SWIZZLE_1111 > }; > + > +static const struct rc_src_register builtin_half = { > + .File = RC_FILE_NONE, > + .Index = 0, > + .Swizzle = RC_SWIZZLE_HHHH > +}; > + > static const struct rc_src_register srcreg_undefined = { > .File = RC_FILE_NONE, > .Index = 0, > @@ -416,6 +423,52 @@ static void transform_POW(struct radeon_compiler* c, > rc_remove_instruction(inst); > } > > +/* dst = ROUND(src) : > + * frac = FRC(src) > + * low = src - frac > + * high = low + 1 > + * dst = CND high, low, frac > + * > + * According to the GLSL spec, the implementor can decide which way to round > + * when the fraction is .5. In this case we round down, so we can use > + * CND and save an instruction. > + * > + * The optimizer should reduce this sequence to 3 instructions using > + * presubtract. > + */ > +static void transform_ROUND(struct radeon_compiler* c, > + struct rc_instruction* inst) > +{ > + unsigned int mask = inst->U.I.DstReg.WriteMask; > + unsigned int frac_index, low_index, high_index; > + struct rc_dst_register frac_dst, low_dst, high_dst; > + struct rc_src_register frac_src, low_src, high_src; > + > + /* frac = FRC(src) */ > + frac_index = rc_find_free_temporary(c); > + frac_dst = dstregtmpmask(frac_index, mask); > + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, inst->U.I.SrcReg[0]); > + frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); > + > + /* low = src - frc */ > + low_index = rc_find_free_temporary(c); > + low_dst = dstregtmpmask(low_index, mask); > + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, low_dst, > + negate(inst->U.I.SrcReg[0]), frac_src); > + low_src = srcreg(RC_FILE_TEMPORARY, low_dst.Index); > + > + /* high = low + 1 */ > + high_index = rc_find_free_temporary(c); > + high_dst = dstregtmpmask(high_index, mask); > + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, high_dst, low_src, builtin_one); > + high_src = srcreg(RC_FILE_TEMPORARY, high_dst.Index); > + > + /* dst = CND high, low, frac */ > + emit3(c, inst->Prev, RC_OPCODE_CND, 0, inst->U.I.DstReg, > + high_src, low_src, frac_src); > + rc_remove_instruction(inst); > +} > + > static void transform_RSQ(struct radeon_compiler* c, > struct rc_instruction* inst) > { > @@ -599,6 +652,7 @@ int radeonTransformALU( > case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; > case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; > case RC_OPCODE_POW: transform_POW(c, inst); return 1; > + case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1; > case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; > case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; > case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; > diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c > index 07a3f3c..4cb08b5 100644 > --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c > +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c > @@ -57,7 +57,7 @@ static unsigned translate_opcode(unsigned opcode) > case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; > case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; > case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; > - /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ > + case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; > case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; > case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; > case TGSI_OPCODE_POW: return RC_OPCODE_POW; > -- > 1.7.3.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev