[Mesa-dev] PATCH: R600/SI: Instruction verifier improvements

Tom Stellard Wed, 12 Mar 2014 09:50:35 -0700

Hi,

The attached patches add some more checks to the instruction verifier on SI
and also fix some false positives.  This should fix all the verifier errors
generated by the piglit tests.


As far as I can tell, there are no regressions with this series.  I had some 
trouble
making it through a full run on my Boanire system without hangs, so I would 
feel better
if someone else was able to test these.

-Tom

>From a31bd001e5b2705c32edce4950bb403c1dc1f010 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stell...@amd.com>
Date: Tue, 11 Mar 2014 10:11:55 -0400
Subject: [PATCH 1/3] R600/SI: Add generic checks to
 SIInstrInfo::verifyInstruction()

Added checks for number of operands and operand register classes.
---
 lib/Target/R600/SIInstrInfo.cpp | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index a239fb9..1f0a1a8 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -382,6 +382,47 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
 
+  // Make sure the number of operands is correct.
+  const MCInstrDesc &Desc = get(Opcode);
+  if (!Desc.isVariadic() &&
+      Desc.getNumOperands() != MI->getNumExplicitOperands()) {
+     ErrInfo = "Instruction has wrong number of operands.";
+     return false;
+  }
+
+  // Make sure the register classes are correct
+  for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+    switch (Desc.OpInfo[i].OperandType) {
+    case MCOI::OPERAND_REGISTER:
+      break;
+    case MCOI::OPERAND_IMMEDIATE:
+      if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
+        ErrInfo = "Expected immediate, but got non-immediate";
+        return false;
+      }
+      // Fall-through
+    default:
+      continue;
+    }
+
+    if (!MI->getOperand(i).isReg())
+      continue;
+
+    int RegClass = Desc.OpInfo[i].RegClass;
+    if (RegClass != -1) {
+      unsigned Reg = MI->getOperand(i).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      const TargetRegisterClass *RC = RI.getRegClass(RegClass);
+      if (!RC->contains(Reg)) {
+        ErrInfo = "Operand has incorrect register class.";
+        return false;
+      }
+    }
+  }
+
+
   // Verify VOP*
   if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
     unsigned ConstantBusCount = 0;
-- 
1.8.1.5

>From 8abee76f5376a7bf5ef5f210493b58b18810902d Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stell...@amd.com>
Date: Tue, 11 Mar 2014 17:28:57 -0400
Subject: [PATCH 2/3] R600/SI: Use correct dest register class for
 V_READFIRSTLANE_B32

This instructions writes to an 32-bit SGPR.  This change required adding
the 32-bit VCC_LO and VCC_HI registers, because the full VCC register
is 64 bits.

This fixes verifier errors on several of the indirect addressing piglit
tests.
---
 lib/Target/R600/AMDGPUAsmPrinter.cpp   |  3 ++-
 lib/Target/R600/SIInstructions.td      | 13 ++++++++++++-
 lib/Target/R600/SILowerControlFlow.cpp |  5 +++--
 lib/Target/R600/SIRegisterInfo.td      | 13 +++++++++++--
 test/CodeGen/R600/private-memory.ll    |  4 ++--
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index ccc3d13..b166c45 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -210,7 +210,8 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
           continue;
         }
         unsigned reg = MO.getReg();
-        if (reg == AMDGPU::VCC) {
+        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
+	    reg == AMDGPU::VCC_HI) {
           VCCUsed = true;
           continue;
         }
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 03e954f..9a18f7b 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -631,7 +631,18 @@ let neverHasSideEffects = 1, isMoveImm = 1 in {
 defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
 } // End neverHasSideEffects = 1, isMoveImm = 1
 
-defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+let Uses = [EXEC] in {
+
+def V_READFIRSTLANE_B32 : VOP1 <
+  0x00000002,
+  (outs SReg_32:$vdst),
+  (ins VReg_32:$src0),
+  "V_READFIRSTLANE_B32 $vdst, $src0",
+  []
+>;
+
+}
+
 defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
   [(set i32:$dst, (fp_to_sint f64:$src0))]
 >;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 5ec4930..182f28b 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -345,12 +345,13 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
           .addReg(AMDGPU::EXEC);
 
   // Read the next variant into VCC (lower 32 bits) <- also loop target
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+          AMDGPU::VCC_LO)
           .addReg(Idx);
 
   // Move index from VCC into M0
   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-          .addReg(AMDGPU::VCC);
+          .addReg(AMDGPU::VCC_LO);
 
   // Compare the just read M0 value to all possible Idx values
   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 49bdbc9..65cf311 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -17,7 +17,16 @@ class SIReg <string n, bits<16> encoding = 0> : Register<n> {
 }
 
 // Special Registers
-def VCC : SIReg<"VCC", 106>;
+def VCC_LO : SIReg<"vcc_lo", 106>;
+def VCC_HI : SIReg<"vcc_hi", 107>;
+
+// VCC for 64-bit instructions
+def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = 106;
+}
+
 def EXEC : SIReg<"EXEC", 126>;
 def SCC : SIReg<"SCC", 253>;
 def M0 : SIReg <"M0", 124>;
@@ -150,7 +159,7 @@ def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
 
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
-  (add SGPR_32, M0Reg)
+  (add SGPR_32, M0Reg, VCC_LO)
 >;
 
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index e22c718..4920320 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -13,10 +13,10 @@
 ; R600-CHECK-NOT: ALU clause
 ; R600-CHECK: 0 + AR.x
 
-; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
 ; SI-CHECK: V_MOVRELD
 ; SI-CHECK: S_CBRANCH
-; SI-CHECK: V_READFIRSTLANE
+; SI-CHECK: V_READFIRSTLANE_B32 vcc_lo
 ; SI-CHECK: V_MOVRELD
 ; SI-CHECK: S_CBRANCH
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
-- 
1.8.1.5

>From 06d120450658a78f4a2ab6c8906f7a3873a3bb55 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stell...@amd.com>
Date: Tue, 11 Mar 2014 18:13:04 -0400
Subject: [PATCH 3/3] R600/SI: Fix implementation of isInlineConstant() used by
 the verifier

The type of the immediates should not matter as long as the encoding is
equivalent to the encoding of one of the legal inline constants.
---
 lib/Target/R600/SIInstrInfo.cpp | 39 +++++++++++++++++++++++++--------------
 test/CodeGen/R600/v_cndmask.ll  | 13 +++++++++++++
 2 files changed, 38 insertions(+), 14 deletions(-)
 create mode 100644 test/CodeGen/R600/v_cndmask.ll

diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 1f0a1a8..e2e9f56 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -354,21 +354,32 @@ bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
 }
 
 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
-  if(MO.isImm()) {
-    return MO.getImm() >= -16 && MO.getImm() <= 64;
-  }
-  if (MO.isFPImm()) {
-    return MO.getFPImm()->isExactlyValue(0.0)  ||
-           MO.getFPImm()->isExactlyValue(0.5)  ||
-           MO.getFPImm()->isExactlyValue(-0.5) ||
-           MO.getFPImm()->isExactlyValue(1.0)  ||
-           MO.getFPImm()->isExactlyValue(-1.0) ||
-           MO.getFPImm()->isExactlyValue(2.0)  ||
-           MO.getFPImm()->isExactlyValue(-2.0) ||
-           MO.getFPImm()->isExactlyValue(4.0)  ||
-           MO.getFPImm()->isExactlyValue(-4.0);
+
+  union {
+    int32_t I;
+    float F;
+  } Imm;
+
+  if (MO.isImm()) {
+    Imm.I = MO.getImm();
+  } else if (MO.isFPImm()) {
+    Imm.F = MO.getFPImm()->getValueAPF().convertToFloat();
+  } else {
+    return false;
   }
-  return false;
+
+  // The actual type of the operand does not seem to matter as long
+  // as the bits match one of the inline immediate values.  For example:
+  //
+  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+  // so it is a legal inline immediate.
+  //
+  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+  // floating-point, so it is a legal inline immediate.
+  return (Imm.I >= -16 && Imm.I <= 64) ||
+          Imm.F == 0.0f || Imm.F == 0.5f || Imm.F == -0.5f || Imm.F == 1.0f ||
+          Imm.F == -1.0f || Imm.F == 2.0f || Imm.F == -2.0f || Imm.F == 4.0f ||
+          Imm.F == -4.0f;
 }
 
 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
new file mode 100644
index 0000000..9253f76
--- /dev/null
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; SI: @v_cnd_nan
+; SI: V_CNDMASK_B32_e64 v{{[0-9]}},
+; SI-DAG: v{{[0-9]}}
+; SI-DAG: -nan
+define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) {
+entry:
+  %0 = icmp ne i32 %c, 0
+  %1 = select i1 %0, float 0xFFFFFFFFE0000000, float %f
+  store float %1, float addrspace(1)* %out
+  ret void
+}
-- 
1.8.1.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] PATCH: R600/SI: Instruction verifier improvements

Reply via email to