Am Mittwoch, den 15.12.2010, 14:17 -0500 schrieb Jerome Glisse: > Can you resend with attaching the patch, i am getting corrupted patch > here dunno why. I attached the original patch and also an additional because Alex mentioned that my assumption that you can't writemask reduction operations is wrong.
Christian.
>From 4e4bdb62e275f8705561e482530aad83886a9330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsim...@vodafone.de> Date: Sun, 12 Dec 2010 15:13:39 +0100 Subject: [PATCH 1/2] r600g: Why all this fiddling with tgsi_helper_copy? tgsi_helper_copy is used on several occasions to copy a temporary result into the real destination register to emulate writemasks for OP3 and reduction operations. According to R600 ISA that's unnecessary. This patch fixes this use for MAD, CMP and DP4. --- src/gallium/drivers/r600/r600_shader.c | 62 +++++++++++++++++++++----------- 1 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d645502..d1575d1 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1587,6 +1587,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = 0; + + for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + lasti = i; + } + } r = tgsi_split_constant(ctx, r600_src); if (r) @@ -1594,26 +1601,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) r = tgsi_split_literal_constant(ctx, r600_src); if (r) return r; - /* do it in 2 step as op3 doesn't support writemask */ - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_dp(struct r600_shader_ctx *ctx) @@ -1636,7 +1649,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - alu.dst.sel = ctx->temp_reg; + if(inst->Dst[0].Register.WriteMask & (1 << i)) { + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } else { + alu.dst.sel = ctx->temp_reg; + } alu.dst.chan = i; alu.dst.write = 1; /* handle some special cases */ @@ -1670,7 +1689,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) if (r) return r; } - return tgsi_helper_copy(ctx, inst); + return 0; } static int tgsi_tex(struct r600_shader_ctx *ctx) @@ -2000,8 +2019,14 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int use_temp = 0; int i, r; + int lasti = 0; + + for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + lasti = i; + } + } r = tgsi_split_constant(ctx, r600_src); if (r) @@ -2010,10 +2035,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) if (r) return r; - if (inst->Dst[0].Register.WriteMask != 0xf) - use_temp = 1; + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); alu.src[0] = r600_src[0]; @@ -2025,24 +2050,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) alu.src[2] = r600_src[1]; alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - if (use_temp) - alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; - if (i == 3) + if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; } - if (use_temp) - return tgsi_helper_copy(ctx, inst); return 0; } -- 1.7.1
>From 9b2a3a5cf45473122ea16176218635b43bd92d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsim...@vodafone.de> Date: Tue, 14 Dec 2010 19:32:08 +0100 Subject: [PATCH 2/2] r600g: DP4 also supports writemasking --- src/gallium/drivers/r600/r600_shader.c | 14 ++++++-------- 1 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d1575d1..0ad857f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1649,15 +1649,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - if(inst->Dst[0].Register.WriteMask & (1 << i)) { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } else { - alu.dst.sel = ctx->temp_reg; - } + + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.chan = i; - alu.dst.write = 1; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: -- 1.7.1
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev