On Fri, 20 Feb 2015 01:54:03 +0100, Dave Airlie <airl...@gmail.com> wrote:

From: Dave Airlie <airl...@redhat.com>

Only a subset of AMD GPUs supported by r600g support doubles,
CAYMAN and CYPRESS are probably all we'll try and support, however
I don't have a CYPRESS so ignore that for now.

This disables SB support for doubles, as we think we need to
make the scheduler smarter to introduce delay slots.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/gallium/drivers/r600/r600_asm.c    |  14 ++
 src/gallium/drivers/r600/r600_asm.h    |  15 ++
 src/gallium/drivers/r600/r600_isa.h    |   8 +-
 src/gallium/drivers/r600/r600_pipe.c   |   2 +
src/gallium/drivers/r600/r600_shader.c | 389 ++++++++++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_shader.h |   2 +
 6 files changed, 424 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 79e7f74..dc26b63 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,6 +252,12 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
        return 0;
 }
+static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+{
+       const struct alu_op_info *op = r600_isa_alu(alu->op);
+       return (op->flags & AF_64);
+}
+
static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
        unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
@@ -576,6 +582,12 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
        for (i = 0; i < max_slots; ++i) {
if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
+
+                       if (is_alu_64bit_inst(bc, prev[i])) {
+                               gpr[i] = -1;
+                               continue;
+                       }
+
                        gpr[i] = prev[i]->dst.sel;
                        /* cube writes more than PV.X */
                        if (is_alu_reduction_inst(bc, prev[i]))
@@ -591,6 +603,8 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
                if(!alu)
                        continue;
+               if (is_alu_64bit_inst(bc, alu))
+                       continue;
                num_src = r600_bytecode_get_num_operands(bc, alu);
                for (src = 0; src < num_src; ++src) {
                        if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index e37d926..7b2734c 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -279,4 +279,19 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                           unsigned *num_format, unsigned *format_comp, 
unsigned *endian);
+
+static INLINE int fp64_switch(int i)
+{

Rather hard to decipher what this function does. How about fp64_gpr_channel_swizzle?

+       switch (i) {
+       case 0:
+               return 1;
+       case 1:
+               return 0;
+       case 2:
+               return 3;
+       case 3:
+               return 2;
+       }
+       return 0;
+}
 #endif
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index ec3f702..3cc135e 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -339,11 +339,11 @@ static const struct alu_op_info alu_op_table[] = {

Might be an idea to fix up the table entries for MULADD for R6xx/R7xx, they are 4 slot too.

FREXP_64 is a 4 slot instruction, not 2.

{"PRED_SETGT_64", 2, { 0x7C, 0xC7 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_GT | AF_64 }, {"PRED_SETE_64", 2, { 0x7D, 0xC8 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_E | AF_64 }, {"PRED_SETGE_64", 2, { 0x7E, 0xC9 },{ AF_V, AF_V, AF_V, AF_V}, AF_PRED | AF_CC_GE | AF_64 }, - {"MUL_64", 2, { 0x1B, 0xCA },{ AF_V, AF_V, AF_V, AF_V}, AF_64 },
+               {"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  AF_V,

4 slot instruction also on r600/r700/evergreen, might as well fix the table entries while touching this

AF_V,  AF_4V}, AF_64 },
{"ADD_64", 2, { 0x17, 0xCB },{ AF_V, AF_V, AF_V, AF_V}, AF_64 }, {"MOVA_INT", 1, { 0x18, 0xCC },{ AF_V, AF_V, AF_V, AF_V}, AF_MOVA }, - {"FLT64_TO_FLT32", 1, { 0x1C, 0xCD },{ AF_V, AF_V, AF_V, AF_V}, 0 }, - {"FLT32_TO_FLT64", 1, { 0x1D, 0xCE },{ AF_V, AF_V, AF_V, AF_V}, 0 }, + {"FLT64_TO_FLT32", 1, { 0x1C, 0xCD },{ AF_V, AF_V, AF_V, AF_V}, AF_64 }, + {"FLT32_TO_FLT64", 1, { 0x1D, 0xCE },{ AF_V, AF_V, AF_V, AF_V}, AF_64 }, {"SAD_ACCUM_PREV_UINT", 2, { -1, 0xCF },{ 0, 0, AF_V, AF_V}, AF_UINT_DST | AF_PREV_NEXT }, {"DOT", 2, { -1, 0xD0 },{ 0, 0, AF_V, AF_V}, AF_PREV_NEXT }, {"MUL_PREV", 1, { -1, 0xD1 },{ 0, 0, AF_V, AF_V}, AF_PREV_INTERLEAVE },
@@ -369,7 +369,7 @@ static const struct alu_op_info alu_op_table[] = {
{"FMA", 3, { -1, 0x07 },{ 0, 0, AF_V, AF_V}, 0 }, {"MULADD_INT24", 3, { -1, 0x08 },{ 0, 0, 0, AF_V}, AF_INT_DST | AF_24 }, {"CNDNE_64", 3, { -1, 0x09 },{ 0, 0, AF_V, AF_V}, AF_CMOV | AF_64 }, - {"FMA_64", 3, { -1, 0x0A },{ 0, 0, AF_V, AF_V}, AF_64 }, + {"FMA_64", 3, { -1, 0x0A },{ 0, 0, AF_V, AF_4V}, AF_64 },

4 slot also on evergreen

{"LERP_UINT", 3, { -1, 0x0B },{ 0, 0, AF_V, AF_V}, AF_UINT_DST }, {"BIT_ALIGN_INT", 3, { -1, 0x0C },{ 0, 0, AF_V, AF_V}, AF_INT_DST }, {"BYTE_ALIGN_INT", 3, { -1, 0x0D },{ 0, 0, AF_V, AF_V}, AF_INT_DST }, diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a4b7b66..9d9f1d4 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -488,6 +488,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
                        return PIPE_SHADER_IR_TGSI;
                }
        case PIPE_SHADER_CAP_DOUBLES:
+               if (rscreen->b.family == CHIP_CAYMAN)
+                       return 1;
                return 0;
        }
        return 0;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 77c9909..34c4e14 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -47,7 +47,7 @@ MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
 These 8xx t-slot only opcodes become vector ops, with all four
 slots expecting the arguments on sources a and b. Result is
 broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64

Drop this hunk, its 4 slot on prior chips too, though the documentation is a bit contradictory in claiming its a t-slot instruction which it cannot possibly be due to the 4 inputs it takes...

 These 8xx t-slot only opcodes become vector ops in the z, y, and
 x slots.
EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
@@ -163,6 +163,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
        use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
/* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
        use_sb &= !shader->shader.uses_index_registers;
+       /* disable SB for shaders using doubles */

Note that its a scheduling issue

+       use_sb &= !shader->shader.uses_doubles;
        /* Check if the bytecode has already been built.  When using the llvm
         * backend, r600_shader_from_tgsi() will take care of building the
@@ -339,7 +341,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
        int j;
-       if (i->Instruction.NumDstRegs > 1) {
+ if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) {
                R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
                return -EINVAL;
        }
@@ -1827,6 +1829,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
        shader->indirect_files = ctx.info.indirect_files;
+
+       shader->uses_doubles = ctx.info.uses_doubles;
+
        indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
        tgsi_parse_init(&ctx.parse, tokens);
        ctx.type = ctx.parse.FullHeader.Processor.Processor;
@@ -2608,6 +2613,168 @@ static int tgsi_last_instruction(unsigned writemask)
        return lasti;
 }
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       struct r600_bytecode_alu alu;
+       int i, j, r, lasti = tgsi_last_instruction(write_mask);

dead assignment

+       int use_tmp = 0;
+
+       if (singledest) {
+               switch (write_mask) {
+               case 0x1:
+                       write_mask = 0x3;
+                       break;
+               case 0x2:
+                       use_tmp = 1;
+                       write_mask = 0x3;
+                       break;
+               case 0x4:
+                       write_mask = 0xc;
+                       break;
+               case 0x8:
+                       write_mask = 0xc;
+                       use_tmp = 3;
+                       break;
+               }
+       }
+
+       lasti = tgsi_last_instruction(write_mask);
+       for (i = 0; i <= lasti; i++) {
+
+               if (!(write_mask & (1 << i)))
+                       continue;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+               if (singledest) {
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (use_tmp) {
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               alu.dst.write = 1;
+                       }
+                       if (i == 1 || i == 3)
+                               alu.dst.write = 0;
+               } else
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               alu.op = ctx->inst_info->op;
+
+               if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_DABS) {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+               } else if (!swap) {
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], 
fp64_switch(i));
+                       }
+               } else {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[1], 
fp64_switch(i));
+                       r600_bytecode_src(&alu.src[1], &ctx->src[0], 
fp64_switch(i));
+               }
+
+               /* handle some special cases */
+               if (i == 1 || i == 3) {
+                       switch (ctx->inst_info->tgsi_opcode) {
+                       case TGSI_OPCODE_SUB:
+                               r600_bytecode_src_toggle_neg(&alu.src[1]);
+                               break;
+                       case TGSI_OPCODE_DABS:
+                               r600_bytecode_src_set_abs(&alu.src[0]);
+                               break;
+                       default:
+                               break;
+                       }
+               }
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       if (use_tmp) {
+               write_mask = inst->Dst[0].Register.WriteMask;
+
+               /* move result from temp to dst */
+               for (i = 0; i <= lasti; i++) {
+                       if (!(write_mask & (1 << i)))
+                               continue;
+
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = use_tmp - 1;
+                       alu.last = (i == lasti);
+
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+       return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       /* confirm writemaskiing */

assiiert?

+       if ((write_mask & 0x3) != 0x3 &&
+           (write_mask & 0xc) != 0xc) {
+               fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", 
write_mask);
+               return -1;
+       }
+       return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+       return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)

shortage of string? tgsi_op2_64_single_dest_swap?

+{
+       return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, j, r;
+       int lasti = 3;
+       int tmp = r600_get_temp(ctx);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 
: 1);

i == 3 Might deserve a comment

+               }
+
+               if (inst->Dst[0].Register.WriteMask & (1 << i))
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               else
+                       alu.dst.sel = tmp;
+
+               alu.dst.chan = i;
+               alu.is_op3 = 1;
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
 {
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -2724,6 +2891,152 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx)
}
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, r;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+               if (i == 1 || i == 3)
+                       r600_bytecode_src_toggle_neg(&alu.src[0]);
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       int i, j, r;
+       int firsti = write_mask == 0xc ? 2 : 0;
+
+       for (i = 0; i <= 3; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], 
fp64_switch(i));
+               }
+
+               if (i == 3)
+                       alu.last = 1;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       /* MOV first two channels to writemask dst0 */
+       for (i = 0; i <= 1; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].chan = i + 2;
+               alu.src[0].sel = ctx->temp_reg;
+
+               tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) 
& 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0; i <= 3; i++) {
+               if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+                       /* MOV third channels to writemask dst1 */
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       alu.src[0].chan = 1;
+                       alu.src[0].sel = ctx->temp_reg;
+
+                       tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+                       break;
+               }
+       }
+       return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, r;
+       struct r600_bytecode_alu alu;
+       int last_slot = 3;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       /* these have to write the result to X/Y by the looks of it */
+       for (i = 0 ; i < last_slot; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               /* should only be one src regs */
+               assert (inst->Instruction.NumSrcRegs == 1);
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+               r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+
+               /* RSQ should take the absolute value of src */
+               if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_DRSQ ||
+                   ctx->inst_info->tgsi_opcode == TGSI_OPCODE_DSQRT) {
+                       r600_bytecode_src_set_abs(&alu.src[1]);
+               }
+               alu.dst.sel = t1;
+               alu.dst.chan = i;
+               alu.dst.write = (i == 0 || i == 1);
+
+               if (i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0 ; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
 {
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2802,6 +3115,55 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx)
        return 0;
 }
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i, j, k, r;
+       struct r600_bytecode_alu alu;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       for (k = 0; k <= 2; k++) {
+               if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+                       continue;
+
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ctx->inst_info->op;
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));;
+                       }
+                       alu.dst.sel = t1;
+                       alu.dst.chan = i;
+                       alu.dst.write = 1;
+                       if (i == 3)
+                               alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       for (i = 0; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = i;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
 /*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -7869,5 +8231,28 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_INTERP_CENTROID,   0, ALU_OP0_NOP, tgsi_interp_egcm},
        {TGSI_OPCODE_INTERP_SAMPLE,             0, ALU_OP0_NOP, 
tgsi_interp_egcm},
        {TGSI_OPCODE_INTF2DERP_OFFSET,          0, ALU_OP0_NOP, 
tgsi_interp_egcm},
+       {TGSI_OPCODE_F2D,       0, ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+       {TGSI_OPCODE_D2F,       0, ALU_OP1_FLT64_TO_FLT32, 
tgsi_op2_64_single_dest},
+       {TGSI_OPCODE_DABS,      0, ALU_OP1_MOV, tgsi_op2_64},
+       {TGSI_OPCODE_DNEG,      0, ALU_OP2_ADD_64, tgsi_dneg},
+       {TGSI_OPCODE_DADD,      0, ALU_OP2_ADD_64, tgsi_op2_64},
+       {TGSI_OPCODE_DMUL,      0, ALU_OP2_MUL_64, cayman_mul_double_instr},
+       {TGSI_OPCODE_DMAX,      0, ALU_OP2_MAX_64, tgsi_op2_64},
+       {TGSI_OPCODE_DMIN,      0, ALU_OP2_MIN_64, tgsi_op2_64},
+       {TGSI_OPCODE_DSLT,      0, ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
+       {TGSI_OPCODE_DSGE,      0, ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+       {TGSI_OPCODE_DSEQ,      0, ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+       {TGSI_OPCODE_DSNE,      0, ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+       {TGSI_OPCODE_DRCP,      0, ALU_OP2_RECIP_64, cayman_emit_double_instr},
+       {TGSI_OPCODE_DSQRT,     0, ALU_OP2_SQRT_64, cayman_emit_double_instr},
+       {TGSI_OPCODE_DMAD,      0, ALU_OP3_FMA_64, tgsi_op3_64},
+       {TGSI_OPCODE_DFRAC,     0, ALU_OP1_FRACT_64, tgsi_op2_64},
+       {TGSI_OPCODE_DLDEXP,    0, ALU_OP2_LDEXP_64, tgsi_op2_64},
+       {TGSI_OPCODE_DFRACEXP,  0, ALU_OP1_FREXP_64, tgsi_dfracexp},
+       {TGSI_OPCODE_D2I,       0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_I2D,       0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_D2U,       0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_U2D,       0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_DRSQ,      0, ALU_OP2_RECIPSQRT_64, 
cayman_emit_double_instr},
        {TGSI_OPCODE_LAST,      0, ALU_OP0_NOP, tgsi_unsupported},
 };
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index b2559e9..a10004c 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -87,6 +87,8 @@ struct r600_shader {
        unsigned                vs_as_gs_a;
        unsigned                ps_prim_id_input;
        struct r600_shader_array * arrays;
+
+       boolean                 uses_doubles;
 };
struct r600_shader_key {

With above nits fixed,
Reviewed-by: Glenn Kennard <glenn.kenn...@gmail.com>
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to