From: Tom Stellard <thomas.stell...@amd.com> --- lib/Target/AMDGPU/R600ISelLowering.cpp | 65 ++++++++++++++++++++++--- lib/Target/AMDGPU/R600InstrInfo.cpp | 40 +++++++++++++++ lib/Target/AMDGPU/R600InstrInfo.h | 12 +++++ test/CodeGen/R600/fcmp-cnd.ll | 4 +- test/CodeGen/R600/fcmp-cnde-int-args.ll | 2 +- test/CodeGen/R600/selectcc-icmp-select-float.ll | 2 +- test/CodeGen/R600/selectcc_cnde.ll | 2 +- test/CodeGen/R600/selectcc_cnde_int.ll | 2 +- 8 files changed, 117 insertions(+), 12 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 75a2a90..dc2247a 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -131,15 +131,66 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::MOV_IMM_F32: - TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), - MI->getOperand(1).getFPImm()->getValueAPF() - .bitcastToAPInt().getZExtValue()); - break; case AMDGPU::MOV_IMM_I32: - TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); - break; + { + unsigned DstReg = MI->getOperand(0).getReg(); + bool CanReplace = true; + int64_t Imm; + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + bool IsInlineConstant = false; + + if (MI->getOpcode() == AMDGPU::MOV_IMM_F32) { + APFloat Value = MI->getOperand(1).getFPImm()->getValueAPF(); + float FloatValue = Value.convertToFloat(); + if (FloatValue == 0.0f) { + ImmReg = AMDGPU::ZERO; + IsInlineConstant = true; + } else if (FloatValue == 1.0f) { + ImmReg = AMDGPU::ONE; + IsInlineConstant = true; + } + Imm = Value.bitcastToAPInt().getZExtValue(); + } else { + Imm = MI->getOperand(1).getImm(); + switch (Imm) { + case 0: + ImmReg = AMDGPU::ZERO; + IsInlineConstant = true; + break; + case 1: + ImmReg = AMDGPU::ONE_INT; + IsInlineConstant = true; + break; + } + } + + // Check all the uses of DstReg to make sure it is safe to fold the + // immediate. + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg); + UI != MachineRegisterInfo::use_end(); ++UI) { + if (!TII->canUseImmediate(UI.getOperand()) && + !(IsInlineConstant && TII->isALUInstr((*UI).getOpcode()))) { + CanReplace = false; + break; + } + } + + if (!CanReplace) { + TII->buildMovImm(*BB, I, DstReg, Imm); + } else { + if (!IsInlineConstant) { + // Add the immediate to the use instructions + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(DstReg); + UI != MachineRegisterInfo::use_end(); ++UI) { + TII->setImmOperand(&*UI, R600Operands::IMM, Imm); + } + } + // Update the use's register + MRI.replaceRegWith(DstReg, ImmReg); + } + break; + } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index e437313..2ca5e45 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -138,6 +138,15 @@ bool R600InstrInfo::isCubeOp(unsigned Opcode) const } } +bool R600InstrInfo::isALUInstr(unsigned Opcode) const +{ + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::OP1) | + (TargetFlags & R600_InstFlag::OP2) | + (TargetFlags & R600_InstFlag::OP3)); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { @@ -582,6 +591,37 @@ void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, MI->getOperand(Idx).setImm(Imm); } +bool R600InstrInfo::operandUsesImmediate(const MachineInstr &MI, + R600Operands::Ops Op) const +{ + int OpIndex = getOperandIdx(MI, Op); + return OpIndex != -1 && + MI.getOperand(OpIndex).getReg() == AMDGPU::ALU_LITERAL_X; +} + +bool R600InstrInfo::canUseImmediate(const MachineOperand &MO) const +{ + const MachineInstr *MI = MO.getParent(); + + if (!isALUInstr(MI->getOpcode())) { + return false; + } + + // XXX: Handle cases when an instruction can reference an immediate from + // multiple operands: + // + // 1. Each operand uses the same immediate value + // 2. There is room in the instruction group (MIBundle) for another immediate. + // We need to start using bundles first. + if (operandUsesImmediate(*MI, R600Operands::SRC0) || + operandUsesImmediate(*MI, R600Operands::SRC1) || + operandUsesImmediate(*MI, R600Operands::SRC2)) { + return false; + } + + return true; +} + //===----------------------------------------------------------------------===// // Instruction flag getters/setters //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h index cec1c3b..7c62a53 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.h +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -50,6 +50,9 @@ namespace llvm { bool isReductionOp(unsigned opcode) const; bool isCubeOp(unsigned opcode) const; + /// isALUInstr - Returns true if this Opcode represents an ALU instruction. + bool isALUInstr(unsigned Opcode) const; + /// isVector - Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; @@ -133,6 +136,15 @@ namespace llvm { /// setImmOperand - Helper function for setting instruction flag values. void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const; + /// operandUsesImmediate - Return true if it is legal for this instruction + /// to reference an immediate from one of its opreand and the given operand + /// is using an immediate. + bool operandUsesImmediate(const MachineInstr &MI, R600Operands::Ops Op) const; + + /// canUseImmediate - Return true if this operand can be replaced with an + /// immediate. + bool canUseImmediate(const MachineOperand &MO) const; + ///hasFlagOperand - Returns true if this instruction has an operand for /// storing target flags. bool hasFlagOperand(const MachineInstr &MI) const; diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll index c6b6236..a94cfb5 100644 --- a/test/CodeGen/R600/fcmp-cnd.ll +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -1,6 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;Not checking arguments 2 and 3 to CNDE, because they may change between +;registers and literal.x depending on what the optimizer does. +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll index 92f3b5f..5c981ef 100644 --- a/test/CodeGen/R600/fcmp-cnde-int-args.ll +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll @@ -4,7 +4,7 @@ ; chance to optimize the fcmp + select instructions to CNDE was missed ; due to the fact that the operands to fcmp and select had different types -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, 0.0, -1}} define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { entry: diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll index f1f8ab1..f65a300 100644 --- a/test/CodeGen/R600/selectcc-icmp-select-float.ll +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -2,7 +2,7 @@ ; Note additional optimizations may cause this SGT to be replaced with a ; CND* instruction. -; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} ; Test a selectcc with i32 LHS/RHS and float True/False define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll index e06a170..f0a0f51 100644 --- a/test/CodeGen/R600/selectcc_cnde.ll +++ b/test/CodeGen/R600/selectcc_cnde.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE -;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { %1 = load float addrspace(1)* %in %2 = fcmp oeq float %1, 0.0 diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll index 03d000f..b38078e 100644 --- a/test/CodeGen/R600/selectcc_cnde_int.ll +++ b/test/CodeGen/R600/selectcc_cnde_int.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE_INT -;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %1 = load i32 addrspace(1)* %in %2 = icmp eq i32 %1, 0 -- 1.7.11.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev