Am 02.02.2018 um 05:56 schrieb Dave Airlie: > From: Dave Airlie <airl...@redhat.com> > > Doing a straight uint/int->fp32->fp64 conversion causes > some precision issues, Roland suggested splitting the > integer into two portions and doing two separate > int->fp32->fp64 conversions then adding the results. > > This passes the tests in CTS and piglit. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/drivers/r600/r600_shader.c | 118 > +++++++++++++++++++++++++-------- > 1 file changed, 90 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 13aa681049..22f2736b03 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -4490,44 +4490,106 @@ static int egcm_int_to_double(struct r600_shader_ctx > *ctx) > { > struct tgsi_full_instruction *inst = > &ctx->parse.FullToken.FullInstruction; > struct r600_bytecode_alu alu; > - int i, r; > - int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); > + int i, c, r; > + int write_mask = inst->Dst[0].Register.WriteMask; > + int temp_reg = r600_get_temp(ctx); > > assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || > inst->Instruction.Opcode == TGSI_OPCODE_U2D); > > - for (i = 0; i <= (lasti+1)/2; i++) { > - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > - alu.op = ctx->inst_info->op; > - > - r600_bytecode_src(&alu.src[0], &ctx->src[0], i); > - alu.dst.sel = ctx->temp_reg; > - alu.dst.chan = i; > - alu.dst.write = 1; > - alu.last = 1; > + for (c = 0; c < 2; c++) { > + int dchan = c * 2; > + if (write_mask & (0x3 << dchan)) { > + /* split into 24-bit int and 8-bit int */ > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP2_AND_INT; > + alu.dst.sel = temp_reg; > + alu.dst.chan = dchan; > + r600_bytecode_src(&alu.src[0], &ctx->src[0], c); > + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[1].value = 0xffffff00; > + alu.dst.write = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > > - r = r600_bytecode_add_alu(ctx->bc, &alu); > - if (r) > - return r; > + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > + alu.op = ALU_OP2_AND_INT; > + alu.dst.sel = temp_reg; > + alu.dst.chan = dchan + 1; > + r600_bytecode_src(&alu.src[0], &ctx->src[0], c); > + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[1].value = 0xff; > + alu.dst.write = 1; > + alu.last = 1; > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + } > } > > - for (i = 0; i <= lasti; i++) { > - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); > - alu.op = ALU_OP1_FLT32_TO_FLT64; > + for (c = 0; c < 2; c++) { > + int dchan = c * 2; > + if (write_mask & (0x3 << dchan)) { > + for (i = dchan; i <= dchan + 1; i++) { > + memset(&alu, 0, sizeof(struct > r600_bytecode_alu)); > + alu.op = i == dchan ? ctx->inst_info->op : > ALU_OP1_UINT_TO_FLT; > > - alu.src[0].chan = i/2; > - if (i%2 == 0) > - alu.src[0].sel = ctx->temp_reg; > - else { > - alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; > - alu.src[0].value = 0x0; > + alu.src[0].sel = temp_reg; > + alu.src[0].chan = i; > + alu.dst.sel = temp_reg; > + alu.dst.chan = i; > + alu.dst.write = 1; > + alu.last = i == dchan + 1; > + > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + } > } That'll still work on eg (cypress) where UINT_TO_FLT is scalar, right? I just realized that for the low 8 bits you could actually skip the masking and use UBYTE0_FLT instead if that instruction does what the docs say :-). Though I guess on Cayman that won't be much of an improvement, but might shave off another instruction or two on Cypress (as this one is a vector instruction)... In any case,
Reviewed-by: Roland Scheidegger <srol...@vmware.com> > - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); > - alu.last = i == lasti; > + } > > - r = r600_bytecode_add_alu(ctx->bc, &alu); > - if (r) > - return r; > + for (c = 0; c < 2; c++) { > + int dchan = c * 2; > + if (write_mask & (0x3 << dchan)) { > + for (i = 0; i < 4; i++) { > + memset(&alu, 0, sizeof(struct > r600_bytecode_alu)); > + alu.op = ALU_OP1_FLT32_TO_FLT64; > + > + alu.src[0].chan = dchan + (i / 2); > + if (i == 0 || i == 2) > + alu.src[0].sel = temp_reg; > + else { > + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; > + alu.src[0].value = 0x0; > + } > + alu.dst.sel = ctx->temp_reg; > + alu.dst.chan = i; > + alu.last = i == 3; > + alu.dst.write = 1; > + > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + } > + > + for (i = 0; i <= 1; i++) { > + memset(&alu, 0, sizeof(struct > r600_bytecode_alu)); > + alu.op = ALU_OP2_ADD_64; > + > + alu.src[0].chan = fp64_switch(i); > + alu.src[0].sel = ctx->temp_reg; > + > + alu.src[1].chan = fp64_switch(i + 2); > + alu.src[1].sel = ctx->temp_reg; > + tgsi_dst(ctx, &inst->Dst[0], dchan + i, > &alu.dst); > + alu.last = i == 1; > + > + r = r600_bytecode_add_alu(ctx->bc, &alu); > + if (r) > + return r; > + } > + } > } > > return 0; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev