From: Michel Dänzer <michel.daen...@amd.com> This reverts commit bfd55711c1eb32ffbfceb9d566abae98f0015f23.
It makes piglit unreliable due to VM protection faults and GPU lockups. Signed-off-by: Michel Dänzer <michel.daen...@amd.com> --- Not sure why I wasn't seeing the piglit unreliablity when I tested the patch before it was applied... I'm not sure how the patch could cause those issues directly, so it may be pre-existing bugs merely exposed by it. But either way, we need to make sure piglit works reliably with this before we can apply it again. src/gallium/drivers/radeon/AMDGPU.h | 1 - src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp | 1 - .../drivers/radeon/AMDILCFGStructurizer.cpp | 2 - src/gallium/drivers/radeon/AMDILInstrInfo.td | 2 +- src/gallium/drivers/radeon/Makefile.sources | 1 - src/gallium/drivers/radeon/SIISelLowering.cpp | 2 +- src/gallium/drivers/radeon/SILowerFlowControl.cpp | 161 -------------------- 7 files changed, 2 insertions(+), 168 deletions(-) delete mode 100644 src/gallium/drivers/radeon/SILowerFlowControl.cpp diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h index f484caa..fe36545 100644 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ b/src/gallium/drivers/radeon/AMDGPU.h @@ -25,7 +25,6 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); // SI Passes FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); -FunctionPass *createSILowerFlowControlPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index 0b70191..c1b6840 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -134,7 +134,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(FinalizeMachineBundlesID); } else { PM->add(createSILowerLiteralConstantsPass(*TM)); - PM->add(createSILowerFlowControlPass(*TM)); } return false; diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp index 20e27ef..b167d62 100644 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp @@ -2892,8 +2892,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> switch (instr->getOpcode()) { case AMDGPU::JUMP: return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; - case AMDGPU::BRANCH: - return true; default: return false; } diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.td b/src/gallium/drivers/radeon/AMDILInstrInfo.td index 050a5bd..f8771af 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.td +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.td @@ -211,7 +211,7 @@ include "AMDILIntrinsics.td" // Custom Inserter for Branches and returns, this eventually will be a // seperate pass //===---------------------------------------------------------------------===// -let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { +let isTerminator = 1, usesCustomInserter = 1 in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), "; Pseudo unconditional branch instruction", [(br bb:$target)]>; diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index c5d1207..4b0699b 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -71,7 +71,6 @@ CPP_SOURCES := \ SIInstrInfo.cpp \ SIISelLowering.cpp \ SILowerLiteralConstants.cpp \ - SILowerFlowControl.cpp \ SIMachineFunctionInfo.cpp \ SIRegisterInfo.cpp \ InstPrinter/AMDGPUInstPrinter.cpp \ diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 7c2739c..3f23949 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -78,7 +78,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - case AMDGPU::BRANCH: return BB; + case AMDGPU::CLAMP_SI: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) .addOperand(MI->getOperand(0)) diff --git a/src/gallium/drivers/radeon/SILowerFlowControl.cpp b/src/gallium/drivers/radeon/SILowerFlowControl.cpp deleted file mode 100644 index bf5192e..0000000 --- a/src/gallium/drivers/radeon/SILowerFlowControl.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF) -// to predicated instructions. -// -// All flow control (except loops) is handled using predicated instructions and -// a predicate stack. Each Scalar ALU controls the operations of 64 Vector -// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs -// by writting to the 64-bit EXEC register (each bit corresponds to a -// single vector ALU). Typically, for predicates, a vector ALU will write -// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each -// Vector ALU) and then the ScalarALU will AND the VCC register with the -// EXEC to update the predicates. -// -// For example: -// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 -// SI_IF_NZ %VCC -// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 -// ELSE -// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 -// ENDIF -// -// becomes: -// -// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask -// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask -// S_CBRANCH_EXECZ label0 // This instruction is an -// // optimization which allows us to -// // branch if all the bits of -// // EXEC are zero. -// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch -// -// label0: -// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the -// // Then block. -// %EXEC = S_AND_B64 %SGPR0, %EXEC -// S_BRANCH_EXECZ label1 // Use our branch optimization -// // instruction again. -// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block -// label1: -// S_MOV_B64 // Restore the old EXEC value -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "SIInstrInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -class SILowerFlowControlPass : public MachineFunctionPass { - -private: - static char ID; - const TargetInstrInfo *TII; - std::vector<unsigned> PredicateStack; - std::vector<unsigned> UnusedRegisters; - - void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - -public: - SILowerFlowControlPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "SI Lower flow control instructions"; - } - -}; - -} // End anonymous namespace - -char SILowerFlowControlPass::ID = 0; - -FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) { - return new SILowerFlowControlPass(tm); -} - -bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) { - - // Find all the unused registers that can be used for the predicate stack. - for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(), - I = AMDGPU::SReg_64RegClass.end(); - I != S; --I) { - unsigned Reg = *I; - if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { - UnusedRegisters.push_back(Reg); - } - } - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I)) { - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - default: break; - case AMDGPU::SI_IF_NZ: - pushExecMask(MBB, I); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), - AMDGPU::EXEC) - .addOperand(MI.getOperand(0)) // VCC - .addReg(AMDGPU::EXEC); - MI.eraseFromParent(); - break; - case AMDGPU::ELSE: - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64), - AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64), - AMDGPU::EXEC) - .addReg(PredicateStack.back()) - .addReg(AMDGPU::EXEC); - MI.eraseFromParent(); - break; - case AMDGPU::ENDIF: - popExecMask(MBB, I); - MI.eraseFromParent(); - break; - } - } - } - return false; -} - -void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - - assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack"); - unsigned StackReg = UnusedRegisters.back(); - UnusedRegisters.pop_back(); - PredicateStack.push_back(StackReg); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), - StackReg) - .addReg(AMDGPU::EXEC); -} - -void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - unsigned StackReg = PredicateStack.back(); - PredicateStack.pop_back(); - UnusedRegisters.push_back(StackReg); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64), - AMDGPU::EXEC) - .addReg(StackReg); -} -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev