[llvm-branch-commits] [llvm] ffc3e80 - [NFC] [DAGCombine] Correct the result for sqrt even the iteration is zero
Author: QingShan Zhang Date: 2021-01-25T04:02:44Z New Revision: ffc3e800c65ee58166255ff897f8b7e6d850ddda URL: https://github.com/llvm/llvm-project/commit/ffc3e800c65ee58166255ff897f8b7e6d850ddda DIFF: https://github.com/llvm/llvm-project/commit/ffc3e800c65ee58166255ff897f8b7e6d850ddda.diff LOG: [NFC] [DAGCombine] Correct the result for sqrt even the iteration is zero For now, we correct the result for sqrt if iteration > 0. This doesn't make sense as they are not strict relative. Reviewed By: dmgreen, spatel, RKSimon Differential Revision: https://reviews.llvm.org/D94480 Added: Modified: llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 5a237074a5a3..1bc5377e6863 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4287,9 +4287,7 @@ class TargetLowering : public TargetLoweringBase { /// comparison may check if the operand is NAN, INF, zero, normal, etc. The /// result should be used as the condition operand for a select or branch. virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, - const DenormalMode &Mode) const { -return SDValue(); - } + const DenormalMode &Mode) const; /// Return a target-dependent result if the input operand is not suitable for /// use with a square root estimate calculation. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2ebf7c6ba0f3..cb273a6f299c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22275,43 +22275,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, Reciprocal)) { AddToWorklist(Est.getNode()); -if (Iterations) { +if (Iterations) Est = UseOneConstNR ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); - - if (!Reciprocal) { -SDLoc DL(Op); -EVT CCVT = getSetCCResultType(VT); -SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); -DenormalMode DenormMode = DAG.getDenormalMode(VT); -// Try the target specific test first. -SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode); -if (!Test) { - // If no test provided by target, testing it with denormal inputs to - // avoid wrong estimate. - if (DenormMode.Input == DenormalMode::IEEE) { -// This is specifically a check for the handling of denormal inputs, -// not the result. - -// Test = fabs(X) < SmallestNormal -const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); -APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); -SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); -SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); -Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); - } else -// Test = X == 0.0 -Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); -} - -// The estimate is now completely wrong if the input was exactly 0.0 or -// possibly a denormal. Force the answer to 0.0 or value provided by -// target for those cases. -Est = DAG.getNode( -Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, -Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est); - } +if (!Reciprocal) { + SDLoc DL(Op); + // Try the target specific test first. + SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT)); + + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 or value provided by + // target for those cases. + Est = DAG.getNode( + Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est); } return Est; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 80b745e0354a..7858bc6c43e4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5841,6 +5841,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { retu
[llvm-branch-commits] [llvm] 2962f11 - [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue
Author: QingShan Zhang Date: 2021-01-05T03:22:45Z New Revision: 2962f1149c8fccf8e865654ce11b3f1312165651 URL: https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651 DIFF: https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651.diff LOG: [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue Current implementation assumes that, each MachineConstantPoolValue takes up sizeof(MachineConstantPoolValue::Ty) bytes. For PowerPC, we want to lump all the constants with the same type as one MachineConstantPoolValue to save the cost that calculate the TOC entry for each const. So, we need to extend the MachineConstantPoolValue that break this assumption. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D89108 Added: Modified: llvm/include/llvm/CodeGen/MachineConstantPool.h llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/lib/CodeGen/MachineFunction.cpp llvm/lib/Target/ARM/ARMConstantIslandPass.cpp llvm/lib/Target/Mips/MipsConstantIslandPass.cpp llvm/lib/Target/X86/X86MCInstLower.cpp Removed: diff --git a/llvm/include/llvm/CodeGen/MachineConstantPool.h b/llvm/include/llvm/CodeGen/MachineConstantPool.h index cfc9ca88c976..a9bc0ce300b2 100644 --- a/llvm/include/llvm/CodeGen/MachineConstantPool.h +++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h @@ -41,10 +41,10 @@ class MachineConstantPoolValue { explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {} virtual ~MachineConstantPoolValue() = default; - /// getType - get type of this MachineConstantPoolValue. - /// Type *getType() const { return Ty; } + virtual unsigned getSizeInBytes(const DataLayout &DL) const; + virtual int getExistingMachineCPValue(MachineConstantPool *CP, Align Alignment) = 0; @@ -94,7 +94,7 @@ class MachineConstantPoolEntry { Align getAlign() const { return Alignment; } - Type *getType() const; + unsigned getSizeInBytes(const DataLayout &DL) const; /// This method classifies the entry according to whether or not it may /// generate a relocation entry. This must be conservative, so if it might diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6732c35e2094..85a5d0c59b83 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1970,8 +1970,7 @@ void AsmPrinter::emitConstantPool() { unsigned NewOffset = alignTo(Offset, CPE.getAlign()); OutStreamer->emitZeros(NewOffset - Offset); - Type *Ty = CPE.getType(); - Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); + Offset = NewOffset + CPE.getSizeInBytes(getDataLayout()); OutStreamer->emitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 1eb191465ac9..3f44578b1a2c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1107,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned Idx) { void MachineConstantPoolValue::anchor() {} -Type *MachineConstantPoolEntry::getType() const { +unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const { + return DL.getTypeAllocSize(Ty); +} + +unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const { if (isMachineConstantPoolEntry()) -return Val.MachineCPVal->getType(); - return Val.ConstVal->getType(); +return Val.MachineCPVal->getSizeInBytes(DL); + return DL.getTypeAllocSize(Val.ConstVal->getType()); } bool MachineConstantPoolEntry::needsRelocation() const { @@ -1123,7 +1127,7 @@ SectionKind MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { if (needsRelocation()) return SectionKind::getReadOnlyWithRel(); - switch (DL->getTypeAllocSize(getType())) { + switch (getSizeInBytes(*DL)) { case 4: return SectionKind::getMergeableConst4(); case 8: diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index da7bf6170255..886bc2965969 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -514,7 +514,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { -unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); +unsigned Size = CPs[i].getSizeInBytes(TD); Align Alignment = CPs[i].getAlign(); // Verify that all constant pool entries are a multiple of their alignment. // If not, we would have to pad them out so that instructions stay aligned. diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib
[llvm-branch-commits] [llvm] 7539c75 - [DAGCombine] Remove the check for unsafe-fp-math when we are checking the AFN
Author: QingShan Zhang Date: 2021-01-11T02:25:53Z New Revision: 7539c75bb438f185575573ed4ea8da7cb37d3f2a URL: https://github.com/llvm/llvm-project/commit/7539c75bb438f185575573ed4ea8da7cb37d3f2a DIFF: https://github.com/llvm/llvm-project/commit/7539c75bb438f185575573ed4ea8da7cb37d3f2a.diff LOG: [DAGCombine] Remove the check for unsafe-fp-math when we are checking the AFN We are checking the unsafe-fp-math for sqrt but not for fpow, which behaves inconsistent. As the direction is to remove this global option, we need to remove the unsafe-fp-math check for sqrt and update the test with afn fast-math flags. Reviewed By: Spatel Differential Revision: https://reviews.llvm.org/D93891 Added: Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/test/CodeGen/AMDGPU/fneg-combines.ll llvm/test/CodeGen/AMDGPU/frem.ll llvm/test/CodeGen/NVPTX/fast-math.ll llvm/test/CodeGen/NVPTX/sqrt-approx.ll llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll llvm/test/CodeGen/X86/sqrt-fastmath.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 09c8f7219390..be57d9250db7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13918,7 +13918,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN - if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) || + if (!Flags.hasApproximateFuncs() || (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 23e817eb51cb..e68b4e6c2cd6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8172,8 +8172,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, EVT VT = Op.getValueType(); const SDNodeFlags Flags = Op->getFlags(); - bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath || -Flags.hasApproximateFuncs(); + bool AllowInaccurateRcp = Flags.hasApproximateFuncs(); // Without !fpmath accuracy information, we can't do more because we don't // know exactly whether rcp is accurate enough to meet !fpmath requirement. diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index de10dae7ee9f..eaf4232335ec 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -252,7 +252,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]] define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 { .entry: - %tmp7 = fdiv float 1.00e+00, %tmp6 + %tmp7 = fdiv afn float 1.00e+00, %tmp6 %tmp8 = fmul float 0.00e+00, %tmp7 %tmp9 = fmul reassoc nnan arcp contract float 0.00e+00, %tmp8 %.i188 = fadd float %tmp9, 0.00e+00 diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index ef19917cc45f..46974c2f38d3 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -297,7 +297,7 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 %r0 = load half, half addrspace(1)* %in1, align 4 %r1 = load half, half addrspace(1)* %gep2, align 4 - %r2 = frem half %r0, %r1 + %r2 = frem afn half %r0, %r1 store half %r2, half addrspace(1)* %out, align 4 ret void } @@ -576,7 +576,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 %r0 = load float, float addrspace(1)* %in1, align 4 %r1 = load float, float addrspace(1)* %gep2, align 4 - %r2 = frem float %r0, %r1 + %r2 = frem afn float %r0, %r1 store float %r2, float addrspace(1)* %out, align 4 ret void } @@ -924,7 +924,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add double addrspace(1)* %in2) #1 { %r0 = load double, double addrspace(1)* %in1, align 8 %r1 = load double, double addrspace(1)* %in2, align 8 - %r2 = frem double %r0, %r1 + %r2 = frem afn double %r0, %r1 store double %r2, double addrspace(1)* %out, align 8 ret void } diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll index db5fb63f4e76..1f300fecb131 100644 --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -25,7 +25,7 @@ def
[llvm-branch-commits] [llvm] ebdd20f - Expand the fp_to_int/int_to_fp/fp_round/fp_extend as libcall for fp128
Author: QingShan Zhang Date: 2020-12-17T07:59:30Z New Revision: ebdd20f430c408e200d5c60ef957e777841f0fa3 URL: https://github.com/llvm/llvm-project/commit/ebdd20f430c408e200d5c60ef957e777841f0fa3 DIFF: https://github.com/llvm/llvm-project/commit/ebdd20f430c408e200d5c60ef957e777841f0fa3.diff LOG: Expand the fp_to_int/int_to_fp/fp_round/fp_extend as libcall for fp128 X86 and AArch64 expand it as libcall inside the target. And PowerPC also want to expand them as libcall for P8. So, propose an implement in the legalizer to common the logic and remove the code for X86/AArch64 to avoid the duplicate code. Reviewed By: Craig Topper Differential Revision: https://reviews.llvm.org/D91331 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.h llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/test/CodeGen/AArch64/arm64-fp128.ll llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll llvm/test/CodeGen/PowerPC/f128-conv.ll llvm/test/CodeGen/PowerPC/f128-rounding.ll llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll llvm/test/CodeGen/X86/fp128-load.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7342c663776c..ef151a60a35c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1812,6 +1812,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue Chain) { + unsigned SrcSize = SrcOp.getValueSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); + + // Don't convert with stack if the load/store is expensive. + if ((SrcSize > SlotSize && + !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) || + (SlotSize < DestSize && + !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT))) +return SDValue(); + // Create the stack frame object. Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); @@ -1822,12 +1835,6 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - unsigned SrcSize = SrcOp.getValueSizeInBits(); - unsigned SlotSize = SlotVT.getSizeInBits(); - unsigned DestSize = DestVT.getSizeInBits(); - Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); - // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. SDValue Store; @@ -2415,7 +2422,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, // TODO: Should any fast-math-flags be set for the created nodes? LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); - if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) && + (DestVT.bitsLE(MVT::f64) || + TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND + : ISD::FP_EXTEND, +DestVT))) { LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " "expansion\n"); @@ -2477,8 +2488,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, } return Result; } - // Code below here assumes !isSigned without checking again. - assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + if (isSigned) +return SDValue(); // TODO: Generalize this for use with other types. if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) || @@ -2537,6 +2549,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast); } + // Don't expand it if there isn't cheap fadd. + if (!TLI.isOperationLegalOrCustom( + Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT)) +return SDValue(); + // The following op
[llvm-branch-commits] [llvm] 385e9a2 - [DAGCombiner] Improve shift by select of constant
Author: Layton Kifer Date: 2020-12-18T02:21:42Z New Revision: 385e9a2a047bc0bee13a21a9016763e694a686a3 URL: https://github.com/llvm/llvm-project/commit/385e9a2a047bc0bee13a21a9016763e694a686a3 DIFF: https://github.com/llvm/llvm-project/commit/385e9a2a047bc0bee13a21a9016763e694a686a3.diff LOG: [DAGCombiner] Improve shift by select of constant Clean up a TODO, to support folding a shift of a constant by a select of constants, on targets with different shift operand sizes. Reviewed By: RKSimon, lebedev.ri Differential Revision: https://reviews.llvm.org/D90349 Added: Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/test/CodeGen/AArch64/select_const.ll llvm/test/CodeGen/PowerPC/select_const.ll llvm/test/CodeGen/X86/dagcombine-select.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 212e0a2ea988..74d3e1adcd6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2150,16 +2150,7 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - EVT VT = Sel.getValueType(); - - // In case of shift value and shift amount may have diff erent VT. For instance - // on x86 shift amount is i8 regardles of LHS type. Bail out if we have - // swapped operands and value types do not match. NB: x86 is fine if operands - // are not swapped with shift amount VT being not bigger than shifted value. - // TODO: that is possible to check for a shift operation, correct VTs and - // still perform optimization on x86 if needed. - if (SelOpNo && VT != CBO.getValueType()) -return SDValue(); + EVT VT = BO->getValueType(0); // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll index 945e7cdc35ad..f58232e2ee89 100644 --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -437,10 +437,9 @@ define i8 @shl_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: shl_constant_sel_constants: ; CHECK: // %bb.0: ; CHECK-NEXT:tst w0, #0x1 -; CHECK-NEXT:mov w8, #2 -; CHECK-NEXT:cinc x8, x8, eq -; CHECK-NEXT:mov w9, #1 -; CHECK-NEXT:lsl w0, w9, w8 +; CHECK-NEXT:mov w8, #8 +; CHECK-NEXT:mov w9, #4 +; CHECK-NEXT:csel w0, w9, w8, ne ; CHECK-NEXT:ret %sel = select i1 %cond, i8 2, i8 3 %bo = shl i8 1, %sel @@ -463,10 +462,9 @@ define i8 @lshr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: lshr_constant_sel_constants: ; CHECK: // %bb.0: ; CHECK-NEXT:tst w0, #0x1 -; CHECK-NEXT:mov w8, #2 -; CHECK-NEXT:cinc x8, x8, eq -; CHECK-NEXT:mov w9, #64 -; CHECK-NEXT:lsr w0, w9, w8 +; CHECK-NEXT:mov w8, #8 +; CHECK-NEXT:mov w9, #16 +; CHECK-NEXT:csel w0, w9, w8, ne ; CHECK-NEXT:ret %sel = select i1 %cond, i8 2, i8 3 %bo = lshr i8 64, %sel @@ -488,10 +486,9 @@ define i8 @ashr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: ashr_constant_sel_constants: ; CHECK: // %bb.0: ; CHECK-NEXT:tst w0, #0x1 -; CHECK-NEXT:mov w8, #2 -; CHECK-NEXT:cinc x8, x8, eq -; CHECK-NEXT:mov w9, #-128 -; CHECK-NEXT:asr w0, w9, w8 +; CHECK-NEXT:mov w8, #-16 +; CHECK-NEXT:mov w9, #-32 +; CHECK-NEXT:csel w0, w9, w8, ne ; CHECK-NEXT:ret %sel = select i1 %cond, i8 2, i8 3 %bo = ashr i8 128, %sel diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll index 7e8b6297ed3c..804cc7736bf8 100644 --- a/llvm/test/CodeGen/PowerPC/select_const.ll +++ b/llvm/test/CodeGen/PowerPC/select_const.ll @@ -610,13 +610,24 @@ define i8 @sel_constants_shl_constant(i1 %cond) { } define i8 @shl_constant_sel_constants(i1 %cond) { -; ALL-LABEL: shl_constant_sel_constants: -; ALL: # %bb.0: -; ALL-NEXT:clrlwi 3, 3, 31 -; ALL-NEXT:li 4, 1 -; ALL-NEXT:subfic 3, 3, 3 -; ALL-NEXT:slw 3, 4, 3 -; ALL-NEXT:blr +; ISEL-LABEL: shl_constant_sel_constants: +; ISEL: # %bb.0: +; ISEL-NEXT:andi. 3, 3, 1 +; ISEL-NEXT:li 4, 4 +; ISEL-NEXT:li 3, 8 +; ISEL-NEXT:iselgt 3, 4, 3 +; ISEL-NEXT:blr +; +; NO_ISEL-LABEL: shl_constant_sel_constants: +; NO_ISEL: # %bb.0: +; NO_ISEL-NEXT:andi. 3, 3, 1 +; NO_ISEL-NEXT:li 4, 4 +; NO_ISEL-NEXT:li 3, 8 +; NO_ISEL-NEXT:bc 12, 1, .LBB37_1 +; NO_ISEL-NEXT:blr +; NO_ISEL-NEXT: .LBB37_1: +; NO_ISEL-NEXT:addi 3, 4, 0 +; NO_ISEL-NEXT:blr %sel = select i1 %cond, i8 2, i8 3 %bo = shl i8 1, %sel ret i8 %bo @@ -647,13 +658,24 @@ define i8 @sel_constants_lshr_constant(i1 %cond) { } define i8 @lshr_constant_sel_constants(i1 %co
[llvm-branch-commits] [llvm] 477b650 - [PowerPC] Select the D-Form load if we know its offset meets the requirement
Author: QingShan Zhang Date: 2020-12-18T07:27:26Z New Revision: 477b6505fa1d49339c81fbbda937dc8bb5e53cfd URL: https://github.com/llvm/llvm-project/commit/477b6505fa1d49339c81fbbda937dc8bb5e53cfd DIFF: https://github.com/llvm/llvm-project/commit/477b6505fa1d49339c81fbbda937dc8bb5e53cfd.diff LOG: [PowerPC] Select the D-Form load if we know its offset meets the requirement The LD/STD likewise instruction are selected only when the alignment in the load/store >= 4 to deal with the case that the offset might not be known(i.e. relocations). That means we have to select the X-Form load for %0 = load i64, i64* %arrayidx, align 2 In fact, we can still select the D-Form load if the offset is known. So, we only query the load/store alignment when we don't know if the offset is a multiple of 4. Reviewed By: jji, Nemanjai Differential Revision: https://reviews.llvm.org/D93099 Added: Modified: llvm/lib/Target/PowerPC/PPCInstr64Bit.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/test/CodeGen/PowerPC/ldst-align.ll llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll llvm/test/CodeGen/PowerPC/pr45186.ll llvm/test/CodeGen/PowerPC/store-combine.ll llvm/test/CodeGen/PowerPC/unal4-std.ll llvm/test/CodeGen/PowerPC/unaligned.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 9265c513c031..e19ea6a07a0d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1062,7 +1062,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, - (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64, + (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src), @@ -1173,7 +1173,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), let PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", IIC_LdStLD, -[(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64; +[(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64; // The following four definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). @@ -1380,7 +1380,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", IIC_LdStSTD, -[(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64; +[(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64; def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddrX4:$dst)]>, isPPC64, @@ -1447,7 +1447,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; -def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), +def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STDU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), @@ -1591,11 +1591,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), // Patterns to match r+r indexed loads and stores for // addresses without at least 4-byte alignment. -def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), +def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)), (LWAX xoaddr:$src)>; -def : Pat<(i64 (unaligned4load xoaddr:$src)), +def : Pat<(i64 (NonDSFormLoad xoaddr:$src)), (LDX xoaddr:$src)>; -def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), +def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; // 64-bits atomic loads and stores diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 849b96f507bd..018fb8ffe16c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -495,37 +495,41 @@ def imm64ZExt32 : Operand, ImmLeaf(Imm); }]>; -// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// This is a somewhat weaker condition than actually checking for
[llvm-branch-commits] [llvm] fa42f08 - [PowerPC][FP128] Fix the incorrect calling convention for IEEE long double on Power8
Author: QingShan Zhang Date: 2020-11-25T01:43:48Z New Revision: fa42f08b2643d0a2e53fde8949e7f88b6d965bb8 URL: https://github.com/llvm/llvm-project/commit/fa42f08b2643d0a2e53fde8949e7f88b6d965bb8 DIFF: https://github.com/llvm/llvm-project/commit/fa42f08b2643d0a2e53fde8949e7f88b6d965bb8.diff LOG: [PowerPC][FP128] Fix the incorrect calling convention for IEEE long double on Power8 For now, we are using the GPR to pass the arguments/return value for fp128 on Power8, which is incorrect. It should be VSR. The reason why we do it this way is that, we are setting the fp128 as illegal which make LLVM try to emulate it with i128 on Power8. So, we need to correct it as legal. Reviewed By: Nemanjai Differential Revision: https://reviews.llvm.org/D91527 Added: Modified: llvm/lib/Target/PowerPC/PPCCallingConv.td llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCInstrAltivec.td llvm/test/CodeGen/PowerPC/f128-arith.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 9a15490f1fb0..64de7353c516 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -59,7 +59,7 @@ def RetCC_PPC_Cold : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F1]>>, CCIfType<[f64], CCAssignToReg<[F1]>>, - CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>, + CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", @@ -92,7 +92,7 @@ def RetCC_PPC : CallingConv<[ // For P9, f128 are passed in vector registers. CCIfType<[f128], - CCIfSubtarget<"hasP9Vector()", + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, // Vector types returned as "direct" go into V2 .. V9; note that only the @@ -149,7 +149,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f128], - CCIfSubtarget<"hasP9Vector()", + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], CCIfSubtarget<"hasAltivec()", @@ -216,7 +216,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // Vectors and float128 get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>, - CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>> + CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>> ]>; // This calling convention puts vector arguments always on the stack. It is used @@ -238,7 +238,7 @@ def CC_PPC32_SVR4 : CallingConv<[ // Float128 types treated as vector arguments. CCIfType<[f128], - CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, + CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>>, CCDelegateTo diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5b42dbdb9bee..10aecf97fcdf 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -121,6 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); static cl::opt UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden); +// TODO - Remove this option if soft fp128 has been fully supported . +static cl::opt +EnableSoftFP128("enable-soft-fp128", +cl::desc("temp option to enable soft fp128"), cl::Hidden); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); @@ -1161,6 +1166,32 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); +} else if (Subtarget.hasAltivec() && EnableSoftFP128) { + addRegisterClass(MVT::f128, &PPC::VRRCRegClass); + + for (MVT FPT : MVT::fp_valuetypes()) +setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); + + setOperationAction(ISD::LOAD, MVT::f128, Promote); + setOperationAction(ISD::STORE, MVT::f128, Promote); + + AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); + AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); + + setOperationActi
[llvm-branch-commits] [llvm] 60c28a5 - [NFC][Test] Format the test for IEEE Long double
Author: QingShan Zhang Date: 2020-11-25T03:00:24Z New Revision: 60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee URL: https://github.com/llvm/llvm-project/commit/60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee DIFF: https://github.com/llvm/llvm-project/commit/60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee.diff LOG: [NFC][Test] Format the test for IEEE Long double Added: Modified: llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll llvm/test/CodeGen/PowerPC/store_fptoi.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll index ffc626be2dea..26832efb3f4c 100644 --- a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll +++ b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll @@ -1,65 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT ; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" ; Function Attrs: nounwind readnone define float @fool(float %X) #0 { +; FPCVT-LABEL: fool: +; FPCVT: # %bb.0: # %entry +; FPCVT-NEXT:friz 1, 1 +; FPCVT-NEXT:blr +; +; PPC64-LABEL: fool: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT:fctidz 0, 1 +; PPC64-NEXT:fcfid 0, 0 +; PPC64-NEXT:frsp 1, 0 +; PPC64-NEXT:blr +; +; PWR9-LABEL: fool: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT:xsrdpiz 1, 1 +; PWR9-NEXT:blr entry: %conv = fptosi float %X to i64 %conv1 = sitofp i64 %conv to float ret float %conv1 -; FPCVT-LABEL: @fool -; FPCVT: friz 1, 1 -; FPCVT: blr -; PPC64-LABEL: @fool -; PPC64: fctidz [[REG1:[0-9]+]], 1 -; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]] -; PPC64: frsp 1, [[REG2]] -; PPC64: blr } ; Function Attrs: nounwind readnone define double @foodl(double %X) #0 { +; FPCVT-LABEL: foodl: +; FPCVT: # %bb.0: # %entry +; FPCVT-NEXT:friz 1, 1 +; FPCVT-NEXT:blr +; +; PPC64-LABEL: foodl: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT:fctidz 0, 1 +; PPC64-NEXT:fcfid 1, 0 +; PPC64-NEXT:blr +; +; PWR9-LABEL: foodl: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT:xsrdpiz 1, 1 +; PWR9-NEXT:blr entry: %conv = fptosi double %X to i64 %conv1 = sitofp i64 %conv to double ret double %conv1 -; FPCVT-LABEL: @foodl -; FPCVT: friz 1, 1 -; FPCVT: blr -; PPC64-LABEL: @foodl -; PPC64: fctidz [[REG1:[0-9]+]], 1 -; PPC64: fcfid 1, [[REG1]] -; PPC64: blr } ; Function Attrs: nounwind readnone define float @fooul(float %X) #0 { +; FPCVT-LABEL: fooul: +; FPCVT: # %bb.0: # %entry +; FPCVT-NEXT:friz 1, 1 +; FPCVT-NEXT:blr +; +; PPC64-LABEL: fooul: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT:addis 3, 2, .LCPI2_0@toc@ha +; PPC64-NEXT:li 4, 1 +; PPC64-NEXT:lfs 0, .LCPI2_0@toc@l(3) +; PPC64-NEXT:sldi 4, 4, 63 +; PPC64-NEXT:fsubs 2, 1, 0 +; PPC64-NEXT:fcmpu 0, 1, 0 +; PPC64-NEXT:fctidz 2, 2 +; PPC64-NEXT:stfd 2, -8(1) +; PPC64-NEXT:fctidz 2, 1 +; PPC64-NEXT:stfd 2, -16(1) +; PPC64-NEXT:ld 3, -8(1) +; PPC64-NEXT:ld 5, -16(1) +; PPC64-NEXT:xor 3, 3, 4 +; PPC64-NEXT:bc 12, 0, .LBB2_1 +; PPC64-NEXT:b .LBB2_2 +; PPC64-NEXT: .LBB2_1: # %entry +; PPC64-NEXT:addi 3, 5, 0 +; PPC64-NEXT: .LBB2_2: # %entry +; PPC64-NEXT:sradi 4, 3, 53 +; PPC64-NEXT:clrldi 5, 3, 63 +; PPC64-NEXT:addi 4, 4, 1 +; PPC64-NEXT:cmpldi 4, 1 +; PPC64-NEXT:rldicl 4, 3, 63, 1 +; PPC64-NEXT:or 5, 5, 4 +; PPC64-NEXT:rldicl 6, 5, 11, 53 +; PPC64-NEXT:addi 6, 6, 1 +; PPC64-NEXT:clrldi 7, 5, 53 +; PPC64-NEXT:cmpldi 1, 6, 1 +; PPC64-NEXT:clrldi 6, 3, 53 +; PPC64-NEXT:addi 7, 7, 2047 +; PPC64-NEXT:addi 6, 6, 2047 +; PPC64-NEXT:or 4, 7, 4 +; PPC64-NEXT:or 6, 6, 3 +; PPC64-NEXT:rldicl 4, 4, 53, 11 +; PPC64-NEXT:rldicr 6, 6, 0, 52 +; PPC64-NEXT:bc 12, 1, .LBB2_4 +; PPC64-NEXT: # %bb.3: # %entry +; PPC64-NEXT:ori 6, 3, 0 +; PPC64-NEXT:b .LBB2_4 +; PPC64-NEXT: .LBB2_4: # %entry +; PPC64-NEXT:rldicl 4, 4, 11, 1 +; PPC64-NEXT:cmpdi 3, 0 +; PPC64-NEXT:std 6, -32(1) +; PPC64-NEXT:bc 12, 5, .LBB2_6 +; PPC64-NEXT: # %bb.5: # %entry +; PPC64-NEXT:ori 4, 5, 0 +; PPC64-NEXT:b .LBB2_6 +; PPC64-NEXT: .LBB2_6: # %entry +; PPC64-NEXT:std 4, -24(1) +; PPC64-NEXT:bc 12, 0, .LBB2_8 +; PPC64-NEXT: # %bb.7: # %entry +; PPC64-NEXT:lfd 0, -32(1) +; PPC64-NEXT:fcfid 0, 0 +; PPC64-NEXT:frsp 1, 0 +; PPC64-NEXT:blr +; PPC64-NEXT: .LBB2_8: +; PPC64-NEXT:lfd 0, -24(1) +; PPC64-NEXT:fcfid 0, 0 +; PPC64-NEXT:frsp 0, 0 +; PPC64-NEXT:fadds 1, 0, 0 +; PPC64-NEXT:blr +; +; PWR9-LABEL: fooul: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT:
[llvm-branch-commits] [llvm] 9c588f5 - [DAGCombine] Add hook to allow target specific test for sqrt input
Author: QingShan Zhang Date: 2020-11-25T05:37:15Z New Revision: 9c588f53fc423dd0ed69250fbc93b37b40c0ef44 URL: https://github.com/llvm/llvm-project/commit/9c588f53fc423dd0ed69250fbc93b37b40c0ef44 DIFF: https://github.com/llvm/llvm-project/commit/9c588f53fc423dd0ed69250fbc93b37b40c0ef44.diff LOG: [DAGCombine] Add hook to allow target specific test for sqrt input PowerPC has instruction ftsqrt/xstsqrtdp etc to do the input test for software square root. LLVM now tests it with smallest normalized value using abs + setcc. We should add hook to target that has test instructions. Reviewed By: Spatel, Chen Zheng, Qiu Chao Fang Differential Revision: https://reviews.llvm.org/D80706 Added: Modified: llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrFormats.td llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/fma-mutate.ll llvm/test/CodeGen/PowerPC/recipest.ll Removed: diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 164cbd710713..16580a9160b9 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4277,6 +4277,15 @@ class TargetLowering : public TargetLoweringBase { return SDValue(); } + /// Return a target-dependent comparison result if the input operand is + /// suitable for use with a square root estimate calculation. For example, the + /// comparison may check if the operand is NAN, INF, zero, normal, etc. The + /// result should be used as the condition operand for a select or branch. + virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, + const DenormalMode &Mode) const { +return SDValue(); + } + //======// // Legalization utility functions // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cae602d166d1..4ac1743d2d34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22056,26 +22056,31 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, // possibly a denormal. Force the answer to 0.0 for those cases. SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); -ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; +SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); DenormalMode DenormMode = DAG.getDenormalMode(VT); -if (DenormMode.Input == DenormalMode::IEEE) { - // This is specifically a check for the handling of denormal inputs, - // not the result. - - // fabs(X) < SmallestNormal ? 0.0 : Est - const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); - APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); - SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); - SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); - Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); -} else { - // X == 0.0 ? 0.0 : Est - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); - SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); +// Try the target specific test first. +SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode); +if (!Test) { + // If no test provided by target, testing it with denormal inputs to + // avoid wrong estimate. + if (DenormMode.Input == DenormalMode::IEEE) { +// This is specifically a check for the handling of denormal inputs, +// not the result. + +// Test = fabs(X) < SmallestNormal +const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); +APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); +SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); +SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); +Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + } else +// Test = X == 0.0 +Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); } +// Test ? 0.0 : Est +Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT + : ISD::SELECT, +
[llvm-branch-commits] [llvm] 4d83aba - [DAGCombine] Adding a hook to improve the precision of fsqrt if the input is denormal
Author: QingShan Zhang Date: 2020-11-27T02:10:55Z New Revision: 4d83aba4228ecb7dfefaf10a36a35f7299467819 URL: https://github.com/llvm/llvm-project/commit/4d83aba4228ecb7dfefaf10a36a35f7299467819 DIFF: https://github.com/llvm/llvm-project/commit/4d83aba4228ecb7dfefaf10a36a35f7299467819.diff LOG: [DAGCombine] Adding a hook to improve the precision of fsqrt if the input is denormal For now, we will hardcode the result as 0.0 if the input is denormal or 0. That will have the impact the precision. As the fsqrt added belong to the cold path of the cmp+branch, it won't impact the performance for normal inputs for PowerPC, but improve the precision if the input is denormal. Reviewed By: Spatel Differential Revision: https://reviews.llvm.org/D80974 Added: Modified: llvm/include/llvm/CodeGen/TargetLowering.h llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/fma-mutate.ll llvm/test/CodeGen/PowerPC/recipest.ll Removed: diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 16580a9160b9..4aeefd980d7a 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4286,6 +4286,13 @@ class TargetLowering : public TargetLoweringBase { return SDValue(); } + /// Return a target-dependent result if the input operand is not suitable for + /// use with a square root estimate calculation. + virtual SDValue getSqrtResultForDenormInput(SDValue Operand, + SelectionDAG &DAG) const { +return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType()); + } + //======// // Legalization utility functions // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4ac1743d2d34..1b5debfe602e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22052,8 +22052,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { -// The estimate is now completely wrong if the input was exactly 0.0 or -// possibly a denormal. Force the answer to 0.0 for those cases. SDLoc DL(Op); EVT CCVT = getSetCCResultType(VT); SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); @@ -22077,10 +22075,13 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, // Test = X == 0.0 Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); } -// Test ? 0.0 : Est -Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT - : ISD::SELECT, - DL, VT, Test, FPZero, Est); + +// The estimate is now completely wrong if the input was exactly 0.0 or +// possibly a denormal. Force the answer to 0.0 or value provided by +// target for those cases. +Est = DAG.getNode( +Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, +Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est); } } return Est; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cf369f5f12c1..2d8dfb63f19c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1441,6 +1441,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::FTSQRT: return "PPCISD::FTSQRT"; + case PPCISD::FSQRT: +return "PPCISD::FSQRT"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; @@ -12761,6 +12763,17 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, 0); } +SDValue +PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, + SelectionDAG &DAG) const { + // TODO - add support for v2f64/v4f32 + EVT VT = Op.getValueType(); + if (VT != MVT::f64) +return TargetLowering::getSqrtResultForDenormInput(Op, DAG); + + return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); +} + SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps,
[llvm-branch-commits] [llvm] 47f784a - [PowerPC] Promote the i1 to i64 for SINT_TO_FP/FP_TO_SINT
Author: QingShan Zhang Date: 2020-12-02T05:37:45Z New Revision: 47f784ace6bb43eb9d95277fcc847fb82abf0f7a URL: https://github.com/llvm/llvm-project/commit/47f784ace6bb43eb9d95277fcc847fb82abf0f7a DIFF: https://github.com/llvm/llvm-project/commit/47f784ace6bb43eb9d95277fcc847fb82abf0f7a.diff LOG: [PowerPC] Promote the i1 to i64 for SINT_TO_FP/FP_TO_SINT i1 is the native type for PowerPC if crbits is enabled. However, we need to promote the i1 to i64 as we didn't have the pattern for i1. Reviewed By: Qiu Chao Fang Differential Revision: https://reviews.llvm.org/D92067 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/f128-conv.ll llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2d8dfb63f19c..1864dc7f3113 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -234,6 +234,20 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, +isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote); + AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, +isPPC64 ? MVT::i64 : MVT::i32); + + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, +isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, +isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll index 29cbe2dd4d3f..f8c6c97106ec 100644 --- a/llvm/test/CodeGen/PowerPC/f128-conv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll @@ -181,6 +181,47 @@ entry: } +; Function Attrs: norecurse nounwind +define void @sdwConv2qp_04(fp128* nocapture %a, i1 %b) { +; CHECK-LABEL: sdwConv2qp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:andi. r4, r4, 1 +; CHECK-NEXT:li r4, 0 +; CHECK-NEXT:li r5, -1 +; CHECK-NEXT:iselgt r4, r5, r4 +; CHECK-NEXT:mtvsrwa v2, r4 +; CHECK-NEXT:xscvsdqp v2, v2 +; CHECK-NEXT:stxv v2, 0(r3) +; CHECK-NEXT:blr +; +; CHECK-P8-LABEL: sdwConv2qp_04: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT:mflr r0 +; CHECK-P8-NEXT:.cfi_def_cfa_offset 48 +; CHECK-P8-NEXT:.cfi_offset lr, 16 +; CHECK-P8-NEXT:.cfi_offset r30, -16 +; CHECK-P8-NEXT:std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT:std r0, 16(r1) +; CHECK-P8-NEXT:stdu r1, -48(r1) +; CHECK-P8-NEXT:mr r30, r3 +; CHECK-P8-NEXT:clrldi r3, r4, 63 +; CHECK-P8-NEXT:neg r3, r3 +; CHECK-P8-NEXT:bl __floatsikf +; CHECK-P8-NEXT:nop +; CHECK-P8-NEXT:std r4, 8(r30) +; CHECK-P8-NEXT:std r3, 0(r30) +; CHECK-P8-NEXT:addi r1, r1, 48 +; CHECK-P8-NEXT:ld r0, 16(r1) +; CHECK-P8-NEXT:ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT:mtlr r0 +; CHECK-P8-NEXT:blr +entry: + %conv = sitofp i1 %b to fp128 + store fp128 %conv, fp128* %a, align 16 + ret void + +} + ; Function Attrs: norecurse nounwind define void @udwConv2qp(fp128* nocapture %a, i64 %b) { ; CHECK-LABEL: udwConv2qp: @@ -349,6 +390,43 @@ entry: } +; Function Attrs: norecurse nounwind +define void @udwConv2qp_04(fp128* nocapture %a, i1 %b) { +; CHECK-LABEL: udwConv2qp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:clrlwi r4, r4, 31 +; CHECK-NEXT:mtvsrwa v2, r4 +; CHECK-NEXT:xscvsdqp v2, v2 +; CHECK-NEXT:stxv v2, 0(r3) +; CHECK-NEXT:blr +; +; CHECK-P8-LABEL: udwConv2qp_04: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT:mflr r0 +; CHECK-P8-NEXT:.cfi_def_cfa_offset 48 +; CHECK-P8-NEXT:.cfi_offset lr, 16 +; CHECK-P8-NEXT:.cfi_offset r30, -16 +; CHECK-P8-NEXT:std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT:std r0, 16(r1) +; CHECK-P8-NEXT:stdu r1, -48(r1) +; CHECK-P8-NEXT:mr r30, r3 +; CHECK-P8-NEXT:clrldi r3, r4, 63 +; CHECK-P8-NEXT:bl __floatunsikf +; CHECK-P8-NEXT:nop +; CHECK-P8-NEXT:std r4, 8(r30) +; CHECK-P8-NEXT:std r3, 0(r30) +; CHECK-P8-NEXT:addi r1, r1, 48 +; CHECK-P8-NEXT:ld r0, 16(r1) +; CHECK-P8-NEXT:ld r30, -16(r1) # 8-byte Folded Reload +; C
[llvm-branch-commits] [llvm] 9bf0fea - [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64
Author: QingShan Zhang Date: 2020-12-03T03:19:18Z New Revision: 9bf0fea3729e3ad63da24f94ce22c6b4628bec15 URL: https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15 DIFF: https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15.diff LOG: [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64 PowerPC ISA support the input test for vector type v4f32 and v2f64. Replace the software compare with hw test will improve the perf. Reviewed By: ChenZheng Differential Revision: https://reviews.llvm.org/D90914 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/recipest.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f9f84aa668bc..101ef686c180 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12760,9 +12760,10 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, const DenormalMode &Mode) const { - // TODO - add support for v2f64/v4f32 + // We only have VSX Vector Test for software Square Root. EVT VT = Op.getValueType(); - if (VT != MVT::f64) + if (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) return SDValue(); SDLoc DL(Op); @@ -12788,9 +12789,10 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, SelectionDAG &DAG) const { - // TODO - add support for v2f64/v4f32 + // We only have VSX Vector Square Root. EVT VT = Op.getValueType(); - if (VT != MVT::f64) + if (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) return TargetLowering::getSqrtResultForDenormInput(Op, DAG); return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index e778ca4be6b5..35a0abcfd632 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -640,10 +640,12 @@ let hasSideEffects = 0 in { def XVTSQRTDP : XX2Form_1<60, 234, (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; + "xvtsqrtdp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>; def XVTSQRTSP : XX2Form_1<60, 170, (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; + "xvtsqrtsp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>; } def XVDIVDP : XX3Form<60, 120, @@ -2464,6 +2466,8 @@ def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)), (XVNMADDASP $C, $A, $B)>; def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>; +def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>; +def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index 3d9f2efc32e0..46da4cc6c471 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -953,24 +953,30 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 { ; ; CHECK-P8-LABEL: hoo3_fmf: ; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT:xvtsqrtsp 0, 34 +; CHECK-P8-NEXT:bc 12, 2, .LBB24_2 +; CHECK-P8-NEXT: # %bb.1: ; CHECK-P8-NEXT:xvrsqrtesp 0, 34 ; CHECK-P8-NEXT:addis 3, 2, .LCPI24_0@toc@ha ; CHECK-P8-NEXT:addis 4, 2, .LCPI24_1@toc@ha ; CHECK-P8-NEXT:addi 3, 3, .LCPI24_0@toc@l -; CHECK-P8-NEXT:lvx 3, 0, 3 -; CHECK-P8-NEXT:addi 3, 4, .LCPI24_1@toc@l -; CHECK-P8-NEXT:lvx 4, 0, 3 ; CHECK-P8-NEXT:xvmulsp 1, 34, 0 -; CHECK-P8-NEXT:xvmaddasp 35, 1, 0 -; CHECK-P8-NEXT:xvmulsp 0, 1, 36 -; CHECK-P8-NEXT:xxlxor 1, 1, 1 -; CHECK-P8-NEXT:xvcmpeqsp 2, 34, 1 -; CHECK-P8-NEXT:xvmulsp 0, 0, 35 -; CHECK-P8-NEXT:xxsel 34, 0, 1, 2 +; CHECK-P8-NEXT:lvx 2, 0, 3 +; CHECK-P8-NEXT:addi 3, 4, .LCPI24_1@toc@l +; CHECK-P8-NEXT:lvx 3, 0, 3 +; CHECK-P8-NEXT:xvmaddasp 34, 1, 0 +; CHECK-P8-NEXT:xvmulsp 0, 1, 35 +; CHECK-P8-NEXT:xvmulsp 34, 0, 34 +; CHECK-P8-NEXT:blr +; CHECK-P8-NEXT: .LBB24_2: +; CHECK-P8-NEXT:xvsqrtsp 34, 34 ; CHECK-P8-NEXT:blr ; ; CHECK-P9-LABEL: hoo3_fmf: ; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT:xvtsqrtsp 0, 34 +; C
[llvm-branch-commits] [llvm] c25b039 - [PowerPC] Fix the regression caused by commit 9c588f53fc42
Author: QingShan Zhang Date: 2020-12-04T10:22:13Z New Revision: c25b039e211441033069c7046324d2f76de37bed URL: https://github.com/llvm/llvm-project/commit/c25b039e211441033069c7046324d2f76de37bed DIFF: https://github.com/llvm/llvm-project/commit/c25b039e211441033069c7046324d2f76de37bed.diff LOG: [PowerPC] Fix the regression caused by commit 9c588f53fc42 Add a TypeLegal check for MVT::i1 and add the test. Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/recipest.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 101ef686c1805..c5dbacde6fa5b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12762,8 +12762,9 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, const DenormalMode &Mode) const { // We only have VSX Vector Test for software Square Root. EVT VT = Op.getValueType(); - if (VT != MVT::f64 && - ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) + if (!isTypeLegal(MVT::i1) || + (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX( return SDValue(); SDLoc DL(Op); diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index 46da4cc6c4719..aad0bb47772ea 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -821,6 +821,90 @@ define double @foo3_fmf(double %a) nounwind { ret double %r } +define double @foo3_fmf_crbits_off(double %a) #2 { +; CHECK-P7-LABEL: foo3_fmf_crbits_off: +; CHECK-P7: # %bb.0: +; CHECK-P7-NEXT:fabs 0, 1 +; CHECK-P7-NEXT:addis 3, 2, .LCPI21_2@toc@ha +; CHECK-P7-NEXT:lfd 2, .LCPI21_2@toc@l(3) +; CHECK-P7-NEXT:fcmpu 0, 0, 2 +; CHECK-P7-NEXT:blt 0, .LBB21_2 +; CHECK-P7-NEXT: # %bb.1: +; CHECK-P7-NEXT:frsqrte 0, 1 +; CHECK-P7-NEXT:addis 3, 2, .LCPI21_0@toc@ha +; CHECK-P7-NEXT:addis 4, 2, .LCPI21_1@toc@ha +; CHECK-P7-NEXT:lfs 3, .LCPI21_0@toc@l(3) +; CHECK-P7-NEXT:lfs 4, .LCPI21_1@toc@l(4) +; CHECK-P7-NEXT:fmul 2, 1, 0 +; CHECK-P7-NEXT:fmadd 2, 2, 0, 3 +; CHECK-P7-NEXT:fmul 0, 0, 4 +; CHECK-P7-NEXT:fmul 0, 0, 2 +; CHECK-P7-NEXT:fmul 1, 1, 0 +; CHECK-P7-NEXT:fmadd 0, 1, 0, 3 +; CHECK-P7-NEXT:fmul 1, 1, 4 +; CHECK-P7-NEXT:fmul 1, 1, 0 +; CHECK-P7-NEXT:blr +; CHECK-P7-NEXT: .LBB21_2: +; CHECK-P7-NEXT:fsqrt 1, 1 +; CHECK-P7-NEXT:blr +; +; CHECK-P8-LABEL: foo3_fmf_crbits_off: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT:xsabsdp 0, 1 +; CHECK-P8-NEXT:addis 3, 2, .LCPI21_2@toc@ha +; CHECK-P8-NEXT:lfd 2, .LCPI21_2@toc@l(3) +; CHECK-P8-NEXT:xscmpudp 0, 0, 2 +; CHECK-P8-NEXT:blt 0, .LBB21_2 +; CHECK-P8-NEXT: # %bb.1: +; CHECK-P8-NEXT:xsrsqrtedp 0, 1 +; CHECK-P8-NEXT:addis 3, 2, .LCPI21_0@toc@ha +; CHECK-P8-NEXT:lfs 3, .LCPI21_0@toc@l(3) +; CHECK-P8-NEXT:addis 3, 2, .LCPI21_1@toc@ha +; CHECK-P8-NEXT:lfs 4, .LCPI21_1@toc@l(3) +; CHECK-P8-NEXT:fmr 5, 3 +; CHECK-P8-NEXT:xsmuldp 2, 1, 0 +; CHECK-P8-NEXT:xsmaddadp 5, 2, 0 +; CHECK-P8-NEXT:xsmuldp 0, 0, 4 +; CHECK-P8-NEXT:xsmuldp 0, 0, 5 +; CHECK-P8-NEXT:xsmuldp 1, 1, 0 +; CHECK-P8-NEXT:xsmaddadp 3, 1, 0 +; CHECK-P8-NEXT:xsmuldp 0, 1, 4 +; CHECK-P8-NEXT:xsmuldp 1, 0, 3 +; CHECK-P8-NEXT:blr +; CHECK-P8-NEXT: .LBB21_2: +; CHECK-P8-NEXT:xssqrtdp 1, 1 +; CHECK-P8-NEXT:blr +; +; CHECK-P9-LABEL: foo3_fmf_crbits_off: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT:addis 3, 2, .LCPI21_2@toc@ha +; CHECK-P9-NEXT:xsabsdp 0, 1 +; CHECK-P9-NEXT:lfd 2, .LCPI21_2@toc@l(3) +; CHECK-P9-NEXT:xscmpudp 0, 0, 2 +; CHECK-P9-NEXT:blt 0, .LBB21_2 +; CHECK-P9-NEXT: # %bb.1: +; CHECK-P9-NEXT:xsrsqrtedp 0, 1 +; CHECK-P9-NEXT:addis 3, 2, .LCPI21_0@toc@ha +; CHECK-P9-NEXT:lfs 3, .LCPI21_0@toc@l(3) +; CHECK-P9-NEXT:addis 3, 2, .LCPI21_1@toc@ha +; CHECK-P9-NEXT:xsmuldp 2, 1, 0 +; CHECK-P9-NEXT:fmr 4, 3 +; CHECK-P9-NEXT:xsmaddadp 4, 2, 0 +; CHECK-P9-NEXT:lfs 2, .LCPI21_1@toc@l(3) +; CHECK-P9-NEXT:xsmuldp 0, 0, 2 +; CHECK-P9-NEXT:xsmuldp 0, 0, 4 +; CHECK-P9-NEXT:xsmuldp 1, 1, 0 +; CHECK-P9-NEXT:xsmaddadp 3, 1, 0 +; CHECK-P9-NEXT:xsmuldp 0, 1, 2 +; CHECK-P9-NEXT:xsmuldp 1, 0, 3 +; CHECK-P9-NEXT:blr +; CHECK-P9-NEXT: .LBB21_2: +; CHECK-P9-NEXT:xssqrtdp 1, 1 +; CHECK-P9-NEXT:blr + %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a) + ret double %r +} + define double @foo3_safe(double %a) nounwind { ; CHECK-P7-LABEL: foo3_safe: ; CHECK-P7: # %bb.0: @@ -844,67 +928,67 @@ define float @goo3_fmf(float %a) nounwind { ; CHECK-P7-LABEL: goo3_fmf: ; CHECK-P7: # %bb.0: ; CHECK-P7-NEXT:fabs 0,
[llvm-branch-commits] [llvm] 08280c4 - [NFC][Test] Format the PowerPC test for incoming patch
Author: QingShan Zhang Date: 2020-12-11T09:53:20Z New Revision: 08280c4b73439e5f99000c89a818f66343e87aa6 URL: https://github.com/llvm/llvm-project/commit/08280c4b73439e5f99000c89a818f66343e87aa6 DIFF: https://github.com/llvm/llvm-project/commit/08280c4b73439e5f99000c89a818f66343e87aa6.diff LOG: [NFC][Test] Format the PowerPC test for incoming patch Added: Modified: llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll llvm/test/CodeGen/PowerPC/unal4-std.ll llvm/test/CodeGen/PowerPC/unaligned.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll index b672eef8740af..d6ed3dcf41b0a 100644 --- a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll +++ b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll @@ -1,16 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s @best8x8mode = external dso_local local_unnamed_addr global [4 x i16], align 2 define dso_local void @AlignDSForm() local_unnamed_addr { +; CHECK-LABEL: AlignDSForm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:addis r3, r2, best8x8mode@toc@ha +; CHECK-NEXT:addi r3, r3, best8x8mode@toc@l +; CHECK-NEXT:ldx r3, 0, r3 +; CHECK-NEXT:std r3, 0(r3) entry: %0 = load <4 x i16>, <4 x i16>* bitcast ([4 x i16]* @best8x8mode to <4 x i16>*), align 2 store <4 x i16> %0, <4 x i16>* undef, align 4 unreachable -; CHECK-LABEL: AlignDSForm -; CHECK: addis r{{[0-9]+}}, r{{[0-9]+}}, best8x8mode@toc@ha -; CHECK: addi r[[REG:[0-9]+]], r{{[0-9]+}}, best8x8mode@toc@l -; CHECK: ldx r{{[0-9]+}}, 0, r[[REG]] } diff --git a/llvm/test/CodeGen/PowerPC/unal4-std.ll b/llvm/test/CodeGen/PowerPC/unal4-std.ll index f843b6b58c1ee..038ede0ba92ee 100644 --- a/llvm/test/CodeGen/PowerPC/unal4-std.ll +++ b/llvm/test/CodeGen/PowerPC/unal4-std.ll @@ -1,9 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define void @copy_to_conceal(<8 x i16>* %inp) #0 { +; CHECK-LABEL: copy_to_conceal: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:vxor 2, 2, 2 +; CHECK-NEXT:addi 4, 1, -16 +; CHECK-NEXT:stvx 2, 0, 4 +; CHECK-NEXT:ld 4, -8(1) +; CHECK-NEXT:std 4, 8(3) +; CHECK-NEXT:ld 4, -16(1) +; CHECK-NEXT:stdx 4, 0, 3 +; CHECK-NEXT:blr +; +; CHECK-VSX-LABEL: copy_to_conceal: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT:xxlxor 0, 0, 0 +; CHECK-VSX-NEXT:stxvw4x 0, 0, 3 +; CHECK-VSX-NEXT:blr entry: store <8 x i16> zeroinitializer, <8 x i16>* %inp, align 2 br label %if.end210 @@ -14,11 +31,7 @@ if.end210:; preds = %entry ; This will generate two align-1 i64 stores. Make sure that they are ; indexed stores and not in r+i form (which require the offset to be ; a multiple of 4). -; CHECK: @copy_to_conceal -; CHECK: stdx {{[0-9]+}}, 0, -; CHECK-VSX: @copy_to_conceal -; CHECK-VSX: stxvw4x {{[0-9]+}}, 0, } attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/PowerPC/unaligned.ll b/llvm/test/CodeGen/PowerPC/unaligned.ll index bd518342f3ec9..977c470e668e2 100644 --- a/llvm/test/CodeGen/PowerPC/unaligned.ll +++ b/llvm/test/CodeGen/PowerPC/unaligned.ll @@ -1,105 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" define void @foo1(i16* %p, i16* %r) nounwind { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:lhz 3, 0(3) +; CHECK-NEXT:sth 3, 0(4) +; CHECK-NEXT:blr +; +; CHECK-VSX-LABEL: foo1: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT:lhz 3, 0(3) +; CHECK-VSX-NEXT:sth 3, 0(4) +; CHECK-VSX
[llvm-branch-commits] [llvm] 68dbb77 - [NFC][Test] Add a test to verify the instruction form we got from isel
Author: QingShan Zhang Date: 2020-12-11T10:36:46Z New Revision: 68dbb7789e5388657420afcbdd0f928e4fdfcfb8 URL: https://github.com/llvm/llvm-project/commit/68dbb7789e5388657420afcbdd0f928e4fdfcfb8 DIFF: https://github.com/llvm/llvm-project/commit/68dbb7789e5388657420afcbdd0f928e4fdfcfb8.diff LOG: [NFC][Test] Add a test to verify the instruction form we got from isel Added: llvm/test/CodeGen/PowerPC/ldst-align.ll Modified: Removed: diff --git a/llvm/test/CodeGen/PowerPC/ldst-align.ll b/llvm/test/CodeGen/PowerPC/ldst-align.ll new file mode 100644 index ..129f28191ec7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ldst-align.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s +define i64 @load(i64* %p) { + ; CHECK-LABEL: name: load + ; CHECK: bb.0.entry: + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 24 + ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX $zero8, killed [[ADDI8_]] :: (load 8 from %ir.arrayidx, align 2) + ; CHECK: $x3 = COPY [[LDX]] + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %arrayidx = getelementptr inbounds i64, i64* %p, i64 3 + %0 = load i64, i64* %arrayidx, align 2 + ret i64 %0 +} + +define void @store(i64* %p) { + ; CHECK-LABEL: name: store + ; CHECK: bb.0.entry: + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 16 + ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9 + ; CHECK: STDX killed [[LI8_]], $zero8, killed [[ADDI8_]] :: (store 8 into %ir.arrayidx, align 1) + ; CHECK: BLR8 implicit $lr8, implicit $rm +entry: + %arrayidx = getelementptr inbounds i64, i64* %p, i64 2 + store i64 9, i64* %arrayidx, align 1 + ret void +} + +define void @store_aligned(i64* %p) { + ; CHECK-LABEL: name: store_aligned + ; CHECK: bb.0.entry: + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9 + ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, align 4) + ; CHECK: BLR8 implicit $lr8, implicit $rm +entry: + %arrayidx = getelementptr inbounds i64, i64* %p, i64 2 + store i64 9, i64* %arrayidx, align 4 + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 08e287a - [PowerPC][FP128] Fix the incorrect signature for math library call
Author: QingShan Zhang Date: 2020-12-14T07:52:56Z New Revision: 08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7 URL: https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7 DIFF: https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7.diff LOG: [PowerPC][FP128] Fix the incorrect signature for math library call The runtime library has two family library implementation for ppc_fp128 and fp128. For IBM Long double(ppc_fp128), it is suffixed with 'l', i.e(sqrtl). For IEEE Long double(fp128), it is suffixed with "ieee128" or "f128". We miss to map several libcall for IEEE Long double. Reviewed By: qiucf Differential Revision: https://reviews.llvm.org/D91675 Added: Modified: llvm/lib/CodeGen/TargetLoweringBase.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/f128-arith.ll llvm/test/CodeGen/PowerPC/f128-conv.ll llvm/test/CodeGen/PowerPC/f128-rounding.ll llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll llvm/test/CodeGen/PowerPC/fp-strict-f128.ll llvm/test/CodeGen/PowerPC/recipest.ll Removed: diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 58543b48a994..553434cdd5fa 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -140,18 +140,23 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SUB_F128, "__subkf3"); setLibcallName(RTLIB::MUL_F128, "__mulkf3"); setLibcallName(RTLIB::DIV_F128, "__divkf3"); +setLibcallName(RTLIB::POWI_F128, "__powikf2"); setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2"); setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2"); setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2"); setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2"); setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi"); setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi"); +setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti"); setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi"); setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi"); +setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti"); setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf"); setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf"); +setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf"); setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf"); setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf"); +setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf"); setLibcallName(RTLIB::OEQ_F128, "__eqkf2"); setLibcallName(RTLIB::UNE_F128, "__nekf2"); setLibcallName(RTLIB::OGE_F128, "__gekf2"); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 10cf7d7f5e02..a98d99af552c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1308,8 +1308,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); - setLibcallName(RTLIB::POWI_F128, "__powikf2"); setLibcallName(RTLIB::REM_F128, "fmodf128"); + setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); + setLibcallName(RTLIB::CEIL_F128, "ceilf128"); + setLibcallName(RTLIB::FLOOR_F128, "floorf128"); + setLibcallName(RTLIB::TRUNC_F128, "truncf128"); + setLibcallName(RTLIB::ROUND_F128, "roundf128"); + setLibcallName(RTLIB::LROUND_F128, "lroundf128"); + setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); + setLibcallName(RTLIB::RINT_F128, "rintf128"); + setLibcallName(RTLIB::LRINT_F128, "lrintf128"); + setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); + setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); + setLibcallName(RTLIB::FMA_F128, "fmaf128"); // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll index 61bd03aa6368..587cf32a70e6 100644 --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -195,7 +195,7 @@ define void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) { ; CHECK-P8-NEXT:stdu r1, -48(r1) ; CHECK-P8-NEXT:lvx v2, 0, r3 ; CHECK-P8-NEXT:mr r30, r4 -; CHECK-P8-NEXT:bl sqrtl +; CHECK-P8-NEXT:bl sqrtf128 ; CHECK-P8-NEXT:nop ; CHECK-P8-NEXT:stvx v2, 0, r30 ; CHECK-P8-NEXT:addi r1, r1, 48 @@ -840,7 +840,7 @@ define void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %res) { ; CHECK-P8-NEXT:stdu r1, -48(r1) ; CHECK-P8-NEXT:lvx v2, 0, r3 ; CHECK-P8-NEXT:mr r30, r4 -; CHECK-P8-NEXT:bl c