I did today run these patches against piglit glsl tests and there was
no regressions. I did go testing write the attached patch on top of
this set, it change MULs with explicit accumulator write into implicit
accumulator write. I guess what Eric said mean something like the
attached patch is needed?

In the patch I also did look into MACH command I did comment about but
was not certain what is the easiest way to see the effect. I started
looking at the changes in generated opcodes I might cause by setting
"INTEL_DEBUG=fs", and do comparison on the files I get, is this good
way to do it or is there some more convenient way?

On Thu, Apr 10, 2014 at 3:48 PM, Juha-Pekka Heikkilä
<juhapekka.heikk...@gmail.com> wrote:
> Hi Matt,
>
> the changed set looks good to me, I did side by side comparison on
> what had changed but did not try to run it today. I realized
> immediately when seeing your comment I had not understood to consider
> the "WAR" vs. "RAW" comments in the scheduler. I was thinking when I
> made the latest set the first patch had grown a bit big but did not go
> braking it yet into pieces.
>
> On Wed, Apr 9, 2014 at 11:58 PM, Matt Turner <matts...@gmail.com> wrote:
>> On Fri, Apr 4, 2014 at 6:51 AM, Juha-Pekka Heikkila
>> <juhapekka.heikk...@gmail.com> wrote:
>>> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>>> index a951459..92f82fd 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>>> @@ -758,6 +758,7 @@ fs_instruction_scheduler::calculate_deps()
>>>     schedule_node *last_fixed_grf_write = NULL;
>>>     int reg_width = v->dispatch_width / 8;
>>>
>>> +   schedule_node *last_accumulator_write = NULL;
>>>     /* The last instruction always needs to still be the last
>>>      * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
>>>      * WHILE) and scheduling other things after it would disturb the
>>> @@ -822,6 +823,10 @@ fs_instruction_scheduler::calculate_deps()
>>
>> The line before this was
>>   if (inst->reads_flag()) {
>>>          add_dep(last_conditional_mod[inst->flag_subreg], n);
>>>        }
>>>
>>> +      if (inst->writes_accumulator || inst->dst.is_accumulator()) {
>>> +         add_dep(last_accumulator_write, n);
>>> +      }
>>
>> But we're checking if we're writing the accumulator here, instead of reading 
>> it.
>>
>> We're also not giving the scheduler any benefits from it's new
>> knowledge of accumulator dependencies, because we're still calling
>> add_barrier_deps() above when we don't recognize the destination. I
>> hope you don't mind, but I split the is_accumulator() additions into a
>> separate patch, fixed up the scheduler hunks and sent the revised
>> patch. Let me know if it looks right to you.
From 7dbb363436ff4b70aea6069df37c5bbe393b9973 Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com>
Date: Fri, 11 Apr 2014 14:52:55 +0300
Subject: [PATCH] i965: Change gpu opdoces which implicitly write accumulator
 from explicitly writing accumulator

With the new accumulator backend support change opcode emits which
rely on accumulator to write accumulator implicitly.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 17 ++++++-----------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |  8 ++++----
 src/mesa/drivers/dri/i965/brw_vec4.cpp         | 16 +++++++++-------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  8 ++++----
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4331ee5..add6672 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2132,17 +2132,12 @@ fs_visitor::dead_code_eliminate()
             }
          }
 
-         if (dead) {
-            /* Don't dead code eliminate instructions that write to the
-             * accumulator as a side-effect. Instead just set the destination
-             * to the null register to free it.
-             */
-            if (inst->writes_accumulator) {
-               inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
-            } else {
-               inst->remove();
-               progress = true;
-            }
+         /* Don't dead code eliminate instructions that write to the
+          * accumulator.
+          */
+         if (dead && !inst->writes_accumulator) {
+            inst->remove();
+            progress = true;
          }
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2aa3acd..9897725 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -473,7 +473,8 @@ fs_visitor::visit(ir_expression *ir)
 
             struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
 
-            emit(MUL(acc, op[0], op[1]));
+            fs_inst *mul = emit(MUL(reg_null_d, op[0], op[1]));
+            mul->writes_accumulator = true;
             emit(MACH(reg_null_d, op[0], op[1]));
             emit(MOV(this->result, fs_reg(acc)));
          }
@@ -485,9 +486,8 @@ fs_visitor::visit(ir_expression *ir)
       if (brw->gen >= 7)
          no16("SIMD16 explicit accumulator operands unsupported\n");
 
-      struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
-
-      emit(MUL(acc, op[0], op[1]));
+      fs_inst *mul = emit(MUL(reg_null_d, op[0], op[1]));
+      mul->writes_accumulator = true;
       emit(MACH(this->result, op[0], op[1]));
       break;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index acce0377..a1753ff 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -347,16 +347,18 @@ try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
 
    if (new_writemask == 0) {
       /* Don't dead code eliminate instructions that write to the
-       * accumulator as a side-effect. Instead just set the destination
-       * to the null register to free it.
+       * accumulator.
        */
-      if (inst->writes_accumulator || inst->writes_flag()) {
-         inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-      } else {
-         inst->remove();
+      if (!inst->writes_accumulator) {
+         if (inst->writes_flag()) {
+            inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+         } else {
+               inst->remove();
+         }
+         return true;
       }
 
-      return true;
+      return false;
    } else if (inst->dst.writemask != new_writemask) {
       switch (inst->opcode) {
       case SHADER_OPCODE_TXF_CMS:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8fa0aee..32d305f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1372,7 +1372,8 @@ vec4_visitor::visit(ir_expression *ir)
          } else {
             struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
 
-            emit(MUL(acc, op[0], op[1]));
+            vec4_instruction *mul = emit(MUL(dst_null_d(), op[0], op[1]));
+            mul->writes_accumulator = true;
             emit(MACH(dst_null_d(), op[0], op[1]));
             emit(MOV(result_dst, src_reg(acc)));
          }
@@ -1381,9 +1382,8 @@ vec4_visitor::visit(ir_expression *ir)
       }
       break;
    case ir_binop_imul_high: {
-      struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
-
-      emit(MUL(acc, op[0], op[1]));
+      vec4_instruction *mul = emit(MUL(dst_null_d(), op[0], op[1]));
+      mul->writes_accumulator = true;
       emit(MACH(result_dst, op[0], op[1]));
       break;
    }
-- 
1.8.1.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to