It's a nice optimisation, it also makes machine code easier to read. Reviewed-by: Vincent Lejeune <vljn at ovi.com>
----- Mail original ----- > De : Tom Stellard <t...@stellard.net> > À : mesa-dev@lists.freedesktop.org > Cc : Tom Stellard <thomas.stell...@amd.com> > Envoyé le : Mercredi 28 novembre 2012 23h50 > Objet : [Mesa-dev] [PATCH 5/5] R600: Fold immediates into ALU instructions > when possible v2 > > From: Tom Stellard <thomas.stell...@amd.com> > > v2: > - Fold the immediates using the SelectionDAG > --- > lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 94 +++++++++++++++++++++++++ > lib/Target/AMDGPU/R600InstrInfo.cpp | 16 ++++- > lib/Target/AMDGPU/R600InstrInfo.h | 7 ++ > test/CodeGen/R600/fcmp-cnd.ll | 4 +- > test/CodeGen/R600/fcmp-cnde-int-args.ll | 2 +- > test/CodeGen/R600/literals.ll | 30 ++++++++ > test/CodeGen/R600/selectcc-icmp-select-float.ll | 2 +- > test/CodeGen/R600/selectcc_cnde.ll | 2 +- > test/CodeGen/R600/selectcc_cnde_int.ll | 2 +- > 9 files changed, 153 insertions(+), 6 deletions(-) > create mode 100644 test/CodeGen/R600/literals.ll > > diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > index 10ce6ad..2a80f1b 100644 > --- a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp > @@ -14,6 +14,7 @@ > #include "AMDGPUISelLowering.h" // For AMDGPUISD > #include "AMDGPURegisterInfo.h" > #include "AMDILDevices.h" > +#include "R600InstrInfo.h" > #include "llvm/ADT/ValueMap.h" > #include "llvm/CodeGen/PseudoSourceValue.h" > #include "llvm/CodeGen/SelectionDAGISel.h" > @@ -167,6 +168,99 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { > } > } > break; > + case ISD::ConstantFP: > + case ISD::Constant: > + { > + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); > + // XXX: Custom immediate lowering not implemented yet. Instead we use > + // pseudo instructions defined in SIInstructions.td > + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { > + break; > + } > + const R600InstrInfo *TII = static_cast<const > R600InstrInfo*>(TM.getInstrInfo()); > + > + uint64_t ImmValue = 0; > + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; > + > + if (N->getOpcode() == ISD::ConstantFP) { > + // XXX: 64-bit Immediates not supported yet > + assert(N->getValueType(0) != MVT::f64); > + > + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); > + APFloat Value = C->getValueAPF(); > + float FloatValue = Value.convertToFloat(); > + if (FloatValue == 0.0) { > + ImmReg = AMDGPU::ZERO; > + } else if (FloatValue == 0.5) { > + ImmReg = AMDGPU::HALF; > + } else if (FloatValue == 1.0) { > + ImmReg = AMDGPU::ONE; > + } else { > + ImmValue = Value.bitcastToAPInt().getZExtValue(); > + } > + } else { > + // XXX: 64-bit Immediates not supported yet > + assert(N->getValueType(0) != MVT::i64); > + > + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); > + if (C->getZExtValue() == 0) { > + ImmReg = AMDGPU::ZERO; > + } else if (C->getZExtValue() == 1) { > + ImmReg = AMDGPU::ONE_INT; > + } else { > + ImmValue = C->getZExtValue(); > + } > + } > + > + for (SDNode::use_iterator Use = N->use_begin(), E = SDNode::use_end(); > + Use != E; ++Use) { > + std::vector<SDValue> Ops; > + for (unsigned i = 0; i < Use->getNumOperands(); ++i) { > + Ops.push_back(Use->getOperand(i)); > + } > + > + if (!Use->isMachineOpcode()) { > + if (ImmReg == AMDGPU::ALU_LITERAL_X) { > + // We can only use literal constants (e.g. AMDGPU::ZERO, > + // AMDGPU::ONE, etc) in machine opcodes. > + continue; > + } > + } else { > + if (!TII->isALUInstr(Use->getMachineOpcode())) { > + continue; > + } > + > + int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), > R600Operands::IMM); > + assert(ImmIdx != -1); > + > + // subtract one from ImmIdx, because the DST operand is usually > index > + // 0 for MachineInstrs, but we have no DST in the Ops vector. > + ImmIdx--; > + > + // Check that we aren't already using an immediate. > + // XXX: It's possible for an instruction to have more than one > + // immediate operand, but this is not supported yet. > + if (ImmReg == AMDGPU::ALU_LITERAL_X) { > + ConstantSDNode *C = > dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); > + assert(C); > + > + if (C->getZExtValue() != 0) { > + // This instruction is already using an immediate. > + continue; > + } > + > + // Set the immediate value > + Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); > + } > + } > + // Set the immediate register > + Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); > + > + CurDAG->UpdateNodeOperands(*Use, Ops.data(), > Use->getNumOperands()); > + } > + break; > + } > + > } > return SelectCode(N); > } > diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp > b/lib/Target/AMDGPU/R600InstrInfo.cpp > index 20b1aa3..814e0a2 100644 > --- a/lib/Target/AMDGPU/R600InstrInfo.cpp > +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp > @@ -127,6 +127,15 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const { > } > } > > +bool R600InstrInfo::isALUInstr(unsigned Opcode) const > +{ > + unsigned TargetFlags = get(Opcode).TSFlags; > + > + return ((TargetFlags & R600_InstFlag::OP1) | > + (TargetFlags & R600_InstFlag::OP2) | > + (TargetFlags & R600_InstFlag::OP3)); > +} > + > DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine > *TM, > const ScheduleDAG *DAG) const { > const InstrItineraryData *II = TM->getInstrItineraryData(); > @@ -505,6 +514,11 @@ MachineInstr > *R600InstrInfo::buildMovImm(MachineBasicBlock > &BB, > > int R600InstrInfo::getOperandIdx(const MachineInstr &MI, > R600Operands::Ops Op) const { > + return getOperandIdx(MI.getOpcode(), Op); > +} > + > +int R600InstrInfo::getOperandIdx(unsigned Opcode, > + R600Operands::Ops Op) const { > const static int OpTable[3][R600Operands::COUNT] = { > // W C S S S S S S S S > // R O D L S R R R S R R R S R R L P > @@ -515,7 +529,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, > {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, > {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} > }; > - unsigned TargetFlags = get(MI.getOpcode()).TSFlags; > + unsigned TargetFlags = get(Opcode).TSFlags; > unsigned OpTableIdx; > > if (!HAS_NATIVE_OPERANDS(TargetFlags)) { > diff --git a/lib/Target/AMDGPU/R600InstrInfo.h > b/lib/Target/AMDGPU/R600InstrInfo.h > index cec1c3b..81e1828 100644 > --- a/lib/Target/AMDGPU/R600InstrInfo.h > +++ b/lib/Target/AMDGPU/R600InstrInfo.h > @@ -50,6 +50,9 @@ namespace llvm { > bool isReductionOp(unsigned opcode) const; > bool isCubeOp(unsigned opcode) const; > > + /// isALUInstr - Returns true if this Opcode represents an ALU instruction. > + bool isALUInstr(unsigned Opcode) const; > + > /// isVector - Vector instructions are instructions that must fill all > /// instruction slots within an instruction group. > bool isVector(const MachineInstr &MI) const; > @@ -130,6 +133,10 @@ namespace llvm { > /// if the Instruction does not contain the specified Op. > int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const; > > + /// getOperandIdx - Get the index of Op for the given Opcode. Returns -1 > + /// if the Instruction does not contain the specified Op. > + int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const; > + > /// setImmOperand - Helper function for setting instruction flag values. > void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) > const; > > diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll > index c6b6236..a94cfb5 100644 > --- a/test/CodeGen/R600/fcmp-cnd.ll > +++ b/test/CodeGen/R600/fcmp-cnd.ll > @@ -1,6 +1,8 @@ > ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > > -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], > T[0-9]+\.[XYZW]}} > +;Not checking arguments 2 and 3 to CNDE, because they may change between > +;registers and literal.x depending on what the optimizer does. > +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { > entry: > diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll > b/test/CodeGen/R600/fcmp-cnde-int-args.ll > index 92f3b5f..5c981ef 100644 > --- a/test/CodeGen/R600/fcmp-cnde-int-args.ll > +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll > @@ -4,7 +4,7 @@ > ; chance to optimize the fcmp + select instructions to CNDE was missed > ; due to the fact that the operands to fcmp and select had different types > > -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], > T[0-9]+\.[XYZW]}} > +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}} > > define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { > entry: > diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll > new file mode 100644 > index 0000000..4c731b2 > --- /dev/null > +++ b/test/CodeGen/R600/literals.ll > @@ -0,0 +1,30 @@ > +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > + > +; Test using an integer literal constant. > +; Generated ASM should be: > +; ADD_INT REG literal.x, 5 > +; or > +; ADD_INT literal.x REG, 5 > + > +; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 > +define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { > +entry: > + %0 = add i32 5, %in > + store i32 %0, i32 addrspace(1)* %out > + ret void > +} > + > +; Test using a float literal constant. > +; Generated ASM should be: > +; ADD REG literal.x, 5.0 > +; or > +; ADD literal.x REG, 5.0 > + > +; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 > +define void @float_literal(float addrspace(1)* %out, float %in) { > +entry: > + %0 = fadd float 5.0, %in > + store float %0, float addrspace(1)* %out > + ret void > +} > + > diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll > b/test/CodeGen/R600/selectcc-icmp-select-float.ll > index f1f8ab1..f65a300 100644 > --- a/test/CodeGen/R600/selectcc-icmp-select-float.ll > +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll > @@ -2,7 +2,7 @@ > > ; Note additional optimizations may cause this SGT to be replaced with a > ; CND* instruction. > -; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], > T[0-9]+\.[XYZW]}} > +; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} > ; Test a selectcc with i32 LHS/RHS and float True/False > > define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { > diff --git a/test/CodeGen/R600/selectcc_cnde.ll > b/test/CodeGen/R600/selectcc_cnde.ll > index e06a170..f0a0f51 100644 > --- a/test/CodeGen/R600/selectcc_cnde.ll > +++ b/test/CodeGen/R600/selectcc_cnde.ll > @@ -1,7 +1,7 @@ > ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > > ;CHECK-NOT: SETE > -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], > T[0-9]+\.[XYZW]}} > +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, > [-0-9]+\(2.0}} > define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { > %1 = load float addrspace(1)* %in > %2 = fcmp oeq float %1, 0.0 > diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll > b/test/CodeGen/R600/selectcc_cnde_int.ll > index 03d000f..b38078e 100644 > --- a/test/CodeGen/R600/selectcc_cnde_int.ll > +++ b/test/CodeGen/R600/selectcc_cnde_int.ll > @@ -1,7 +1,7 @@ > ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s > > ;CHECK-NOT: SETE_INT > -;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} > define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { > %1 = load i32 addrspace(1)* %in > %2 = icmp eq i32 %1, 0 > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev