[llvm-branch-commits] [llvm] ffc3e80 - [NFC] [DAGCombine] Correct the result for sqrt even the iteration is zero

2021-01-24 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2021-01-25T04:02:44Z
New Revision: ffc3e800c65ee58166255ff897f8b7e6d850ddda

URL: 
https://github.com/llvm/llvm-project/commit/ffc3e800c65ee58166255ff897f8b7e6d850ddda
DIFF: 
https://github.com/llvm/llvm-project/commit/ffc3e800c65ee58166255ff897f8b7e6d850ddda.diff

LOG: [NFC] [DAGCombine] Correct the result for sqrt even the iteration is zero

For now, we correct the result for sqrt if iteration > 0. This doesn't make
sense as they are not strict relative.

Reviewed By: dmgreen, spatel, RKSimon

Differential Revision: https://reviews.llvm.org/D94480

Added: 


Modified: 
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h 
b/llvm/include/llvm/CodeGen/TargetLowering.h
index 5a237074a5a3..1bc5377e6863 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4287,9 +4287,7 @@ class TargetLowering : public TargetLoweringBase {
   /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
   /// result should be used as the condition operand for a select or branch.
   virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
-   const DenormalMode &Mode) const {
-return SDValue();
-  }
+   const DenormalMode &Mode) const;
 
   /// Return a target-dependent result if the input operand is not suitable for
   /// use with a square root estimate calculation.

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2ebf7c6ba0f3..cb273a6f299c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22275,43 +22275,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue 
Op, SDNodeFlags Flags,
   Reciprocal)) {
 AddToWorklist(Est.getNode());
 
-if (Iterations) {
+if (Iterations)
   Est = UseOneConstNR
 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
-
-  if (!Reciprocal) {
-SDLoc DL(Op);
-EVT CCVT = getSetCCResultType(VT);
-SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
-DenormalMode DenormMode = DAG.getDenormalMode(VT);
-// Try the target specific test first.
-SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode);
-if (!Test) {
-  // If no test provided by target, testing it with denormal inputs to
-  // avoid wrong estimate.
-  if (DenormMode.Input == DenormalMode::IEEE) {
-// This is specifically a check for the handling of denormal 
inputs,
-// not the result.
-
-// Test = fabs(X) < SmallestNormal
-const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
-APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
-SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
-SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
-Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
-  } else
-// Test = X == 0.0
-Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
-}
-
-// The estimate is now completely wrong if the input was exactly 0.0 or
-// possibly a denormal. Force the answer to 0.0 or value provided by
-// target for those cases.
-Est = DAG.getNode(
-Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, 
VT,
-Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
-  }
+if (!Reciprocal) {
+  SDLoc DL(Op);
+  // Try the target specific test first.
+  SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
+
+  // The estimate is now completely wrong if the input was exactly 0.0 or
+  // possibly a denormal. Force the answer to 0.0 or value provided by
+  // target for those cases.
+  Est = DAG.getNode(
+  Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+  Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
 }
 return Est;
   }

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp 
b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 80b745e0354a..7858bc6c43e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5841,6 +5841,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, 
SelectionDAG &DAG) const {
   retu

[llvm-branch-commits] [llvm] 2962f11 - [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue

2021-01-04 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2021-01-05T03:22:45Z
New Revision: 2962f1149c8fccf8e865654ce11b3f1312165651

URL: 
https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651
DIFF: 
https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651.diff

LOG: [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue

Current implementation assumes that, each MachineConstantPoolValue takes
up sizeof(MachineConstantPoolValue::Ty) bytes. For PowerPC, we want to
lump all the constants with the same type as one MachineConstantPoolValue
to save the cost that calculate the TOC entry for each const. So, we need
to extend the MachineConstantPoolValue that break this assumption.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D89108

Added: 


Modified: 
llvm/include/llvm/CodeGen/MachineConstantPool.h
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/lib/CodeGen/MachineFunction.cpp
llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
llvm/lib/Target/X86/X86MCInstLower.cpp

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/MachineConstantPool.h 
b/llvm/include/llvm/CodeGen/MachineConstantPool.h
index cfc9ca88c976..a9bc0ce300b2 100644
--- a/llvm/include/llvm/CodeGen/MachineConstantPool.h
+++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h
@@ -41,10 +41,10 @@ class MachineConstantPoolValue {
   explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {}
   virtual ~MachineConstantPoolValue() = default;
 
-  /// getType - get type of this MachineConstantPoolValue.
-  ///
   Type *getType() const { return Ty; }
 
+  virtual unsigned getSizeInBytes(const DataLayout &DL) const;
+
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
 Align Alignment) = 0;
 
@@ -94,7 +94,7 @@ class MachineConstantPoolEntry {
 
   Align getAlign() const { return Alignment; }
 
-  Type *getType() const;
+  unsigned getSizeInBytes(const DataLayout &DL) const;
 
   /// This method classifies the entry according to whether or not it may
   /// generate a relocation entry.  This must be conservative, so if it might

diff  --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp 
b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6732c35e2094..85a5d0c59b83 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1970,8 +1970,7 @@ void AsmPrinter::emitConstantPool() {
   unsigned NewOffset = alignTo(Offset, CPE.getAlign());
   OutStreamer->emitZeros(NewOffset - Offset);
 
-  Type *Ty = CPE.getType();
-  Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
+  Offset = NewOffset + CPE.getSizeInBytes(getDataLayout());
 
   OutStreamer->emitLabel(Sym);
   if (CPE.isMachineConstantPoolEntry())

diff  --git a/llvm/lib/CodeGen/MachineFunction.cpp 
b/llvm/lib/CodeGen/MachineFunction.cpp
index 1eb191465ac9..3f44578b1a2c 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1107,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned 
Idx) {
 
 void MachineConstantPoolValue::anchor() {}
 
-Type *MachineConstantPoolEntry::getType() const {
+unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const {
+  return DL.getTypeAllocSize(Ty);
+}
+
+unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const {
   if (isMachineConstantPoolEntry())
-return Val.MachineCPVal->getType();
-  return Val.ConstVal->getType();
+return Val.MachineCPVal->getSizeInBytes(DL);
+  return DL.getTypeAllocSize(Val.ConstVal->getType());
 }
 
 bool MachineConstantPoolEntry::needsRelocation() const {
@@ -1123,7 +1127,7 @@ SectionKind
 MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
   if (needsRelocation())
 return SectionKind::getReadOnlyWithRel();
-  switch (DL->getTypeAllocSize(getType())) {
+  switch (getSizeInBytes(*DL)) {
   case 4:
 return SectionKind::getMergeableConst4();
   case 8:

diff  --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp 
b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index da7bf6170255..886bc2965969 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -514,7 +514,7 @@ 
ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs)
 
   const DataLayout &TD = MF->getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
-unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+unsigned Size = CPs[i].getSizeInBytes(TD);
 Align Alignment = CPs[i].getAlign();
 // Verify that all constant pool entries are a multiple of their alignment.
 // If not, we would have to pad them out so that instructions stay aligned.

diff  --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp 
b/llvm/lib

[llvm-branch-commits] [llvm] 7539c75 - [DAGCombine] Remove the check for unsafe-fp-math when we are checking the AFN

2021-01-10 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2021-01-11T02:25:53Z
New Revision: 7539c75bb438f185575573ed4ea8da7cb37d3f2a

URL: 
https://github.com/llvm/llvm-project/commit/7539c75bb438f185575573ed4ea8da7cb37d3f2a
DIFF: 
https://github.com/llvm/llvm-project/commit/7539c75bb438f185575573ed4ea8da7cb37d3f2a.diff

LOG: [DAGCombine] Remove the check for unsafe-fp-math when we are checking the 
AFN

We are checking the unsafe-fp-math for sqrt but not for fpow, which behaves 
inconsistent.
As the direction is to remove this global option, we need to remove the 
unsafe-fp-math
check for sqrt and update the test with afn fast-math flags.

Reviewed By: Spatel

Differential Revision: https://reviews.llvm.org/D93891

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/fneg-combines.ll
llvm/test/CodeGen/AMDGPU/frem.ll
llvm/test/CodeGen/NVPTX/fast-math.ll
llvm/test/CodeGen/NVPTX/sqrt-approx.ll
llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 09c8f7219390..be57d9250db7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13918,7 +13918,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
 
   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
-  if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+  if (!Flags.hasApproximateFuncs() ||
   (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
 return SDValue();
 

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 23e817eb51cb..e68b4e6c2cd6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8172,8 +8172,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
   EVT VT = Op.getValueType();
   const SDNodeFlags Flags = Op->getFlags();
 
-  bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath ||
-Flags.hasApproximateFuncs();
+  bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
 
   // Without !fpmath accuracy information, we can't do more because we don't
   // know exactly whether rcp is accurate enough to meet !fpmath requirement.

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll 
b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index de10dae7ee9f..eaf4232335ec 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -252,7 +252,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, 
float inreg %tmp6, <4 x i
 ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
 define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, 
<4 x i32> %arg) local_unnamed_addr #2 {
 .entry:
-  %tmp7 = fdiv float 1.00e+00, %tmp6
+  %tmp7 = fdiv afn float 1.00e+00, %tmp6
   %tmp8 = fmul float 0.00e+00, %tmp7
   %tmp9 = fmul reassoc nnan arcp contract float 0.00e+00, %tmp8
   %.i188 = fadd float %tmp9, 0.00e+00

diff  --git a/llvm/test/CodeGen/AMDGPU/frem.ll 
b/llvm/test/CodeGen/AMDGPU/frem.ll
index ef19917cc45f..46974c2f38d3 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -297,7 +297,7 @@ define amdgpu_kernel void @unsafe_frem_f16(half 
addrspace(1)* %out, half addrspa
%gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
%r0 = load half, half addrspace(1)* %in1, align 4
%r1 = load half, half addrspace(1)* %gep2, align 4
-   %r2 = frem half %r0, %r1
+   %r2 = frem afn half %r0, %r1
store half %r2, half addrspace(1)* %out, align 4
ret void
 }
@@ -576,7 +576,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float 
addrspace(1)* %out, float addrs
%gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
%r0 = load float, float addrspace(1)* %in1, align 4
%r1 = load float, float addrspace(1)* %gep2, align 4
-   %r2 = frem float %r0, %r1
+   %r2 = frem afn float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
 }
@@ -924,7 +924,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double 
addrspace(1)* %out, double add
  double addrspace(1)* %in2) #1 {
%r0 = load double, double addrspace(1)* %in1, align 8
%r1 = load double, double addrspace(1)* %in2, align 8
-   %r2 = frem double %r0, %r1
+   %r2 = frem afn double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/fast-math.ll 
b/llvm/test/CodeGen/NVPTX/fast-math.ll
index db5fb63f4e76..1f300fecb131 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -25,7 +25,7 @@ def

[llvm-branch-commits] [llvm] ebdd20f - Expand the fp_to_int/int_to_fp/fp_round/fp_extend as libcall for fp128

2020-12-17 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-17T07:59:30Z
New Revision: ebdd20f430c408e200d5c60ef957e777841f0fa3

URL: 
https://github.com/llvm/llvm-project/commit/ebdd20f430c408e200d5c60ef957e777841f0fa3
DIFF: 
https://github.com/llvm/llvm-project/commit/ebdd20f430c408e200d5c60ef957e777841f0fa3.diff

LOG: Expand the fp_to_int/int_to_fp/fp_round/fp_extend as libcall for fp128

X86 and AArch64 expand it as libcall inside the target. And PowerPC also
want to expand them as libcall for P8. So, propose an implement in the
legalizer to common the logic and remove the code for X86/AArch64 to
avoid the duplicate code.

Reviewed By: Craig Topper

Differential Revision: https://reviews.llvm.org/D91331

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/AArch64/arm64-fp128.ll
llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
llvm/test/CodeGen/PowerPC/f128-conv.ll
llvm/test/CodeGen/PowerPC/f128-rounding.ll
llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
llvm/test/CodeGen/X86/fp128-load.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7342c663776c..ef151a60a35c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1812,6 +1812,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue 
SrcOp, EVT SlotVT,
 SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
+  unsigned SrcSize = SrcOp.getValueSizeInBits();
+  unsigned SlotSize = SlotVT.getSizeInBits();
+  unsigned DestSize = DestVT.getSizeInBits();
+  Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
+
+  // Don't convert with stack if the load/store is expensive.
+  if ((SrcSize > SlotSize &&
+   !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
+  (SlotSize < DestSize &&
+   !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
+return SDValue();
+
   // Create the stack frame object.
   Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign(
   SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@@ -1822,12 +1835,6 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue 
SrcOp, EVT SlotVT,
   MachinePointerInfo PtrInfo =
   MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
 
-  unsigned SrcSize = SrcOp.getValueSizeInBits();
-  unsigned SlotSize = SlotVT.getSizeInBits();
-  unsigned DestSize = DestVT.getSizeInBits();
-  Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
-  Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
-
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
   SDValue Store;
@@ -2415,7 +2422,11 @@ SDValue 
SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
 
   // TODO: Should any fast-math-flags be set for the created nodes?
   LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
-  if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+  if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) &&
+  (DestVT.bitsLE(MVT::f64) ||
+   TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND
+ : ISD::FP_EXTEND,
+DestVT))) {
 LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
  "expansion\n");
 
@@ -2477,8 +2488,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode 
*Node,
 }
 return Result;
   }
-  // Code below here assumes !isSigned without checking again.
-  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+  if (isSigned)
+return SDValue();
 
   // TODO: Generalize this for use with other types.
   if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) ||
@@ -2537,6 +2549,11 @@ SDValue 
SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
 return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
   }
 
+  // Don't expand it if there isn't cheap fadd.
+  if (!TLI.isOperationLegalOrCustom(
+  Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT))
+return SDValue();
+
   // The following op

[llvm-branch-commits] [llvm] 385e9a2 - [DAGCombiner] Improve shift by select of constant

2020-12-17 Thread QingShan Zhang via llvm-branch-commits

Author: Layton Kifer
Date: 2020-12-18T02:21:42Z
New Revision: 385e9a2a047bc0bee13a21a9016763e694a686a3

URL: 
https://github.com/llvm/llvm-project/commit/385e9a2a047bc0bee13a21a9016763e694a686a3
DIFF: 
https://github.com/llvm/llvm-project/commit/385e9a2a047bc0bee13a21a9016763e694a686a3.diff

LOG: [DAGCombiner] Improve shift by select of constant

Clean up a TODO, to support folding a shift of a constant by a
select of constants, on targets with different shift operand sizes.

Reviewed By: RKSimon, lebedev.ri

Differential Revision: https://reviews.llvm.org/D90349

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/select_const.ll
llvm/test/CodeGen/PowerPC/select_const.ll
llvm/test/CodeGen/X86/dagcombine-select.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 212e0a2ea988..74d3e1adcd6c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2150,16 +2150,7 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
   !isConstantFPBuildVectorOrConstantFP(CBO))
 return SDValue();
 
-  EVT VT = Sel.getValueType();
-
-  // In case of shift value and shift amount may have 
diff erent VT. For instance
-  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
-  // swapped operands and value types do not match. NB: x86 is fine if operands
-  // are not swapped with shift amount VT being not bigger than shifted value.
-  // TODO: that is possible to check for a shift operation, correct VTs and
-  // still perform optimization on x86 if needed.
-  if (SelOpNo && VT != CBO.getValueType())
-return SDValue();
+  EVT VT = BO->getValueType(0);
 
   // We have a select-of-constants followed by a binary operator with a
   // constant. Eliminate the binop by pulling the constant math into the 
select.

diff  --git a/llvm/test/CodeGen/AArch64/select_const.ll 
b/llvm/test/CodeGen/AArch64/select_const.ll
index 945e7cdc35ad..f58232e2ee89 100644
--- a/llvm/test/CodeGen/AArch64/select_const.ll
+++ b/llvm/test/CodeGen/AArch64/select_const.ll
@@ -437,10 +437,9 @@ define i8 @shl_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: shl_constant_sel_constants:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:tst w0, #0x1
-; CHECK-NEXT:mov w8, #2
-; CHECK-NEXT:cinc x8, x8, eq
-; CHECK-NEXT:mov w9, #1
-; CHECK-NEXT:lsl w0, w9, w8
+; CHECK-NEXT:mov w8, #8
+; CHECK-NEXT:mov w9, #4
+; CHECK-NEXT:csel w0, w9, w8, ne
 ; CHECK-NEXT:ret
   %sel = select i1 %cond, i8 2, i8 3
   %bo = shl i8 1, %sel
@@ -463,10 +462,9 @@ define i8 @lshr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: lshr_constant_sel_constants:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:tst w0, #0x1
-; CHECK-NEXT:mov w8, #2
-; CHECK-NEXT:cinc x8, x8, eq
-; CHECK-NEXT:mov w9, #64
-; CHECK-NEXT:lsr w0, w9, w8
+; CHECK-NEXT:mov w8, #8
+; CHECK-NEXT:mov w9, #16
+; CHECK-NEXT:csel w0, w9, w8, ne
 ; CHECK-NEXT:ret
   %sel = select i1 %cond, i8 2, i8 3
   %bo = lshr i8 64, %sel
@@ -488,10 +486,9 @@ define i8 @ashr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: ashr_constant_sel_constants:
 ; CHECK:   // %bb.0:
 ; CHECK-NEXT:tst w0, #0x1
-; CHECK-NEXT:mov w8, #2
-; CHECK-NEXT:cinc x8, x8, eq
-; CHECK-NEXT:mov w9, #-128
-; CHECK-NEXT:asr w0, w9, w8
+; CHECK-NEXT:mov w8, #-16
+; CHECK-NEXT:mov w9, #-32
+; CHECK-NEXT:csel w0, w9, w8, ne
 ; CHECK-NEXT:ret
   %sel = select i1 %cond, i8 2, i8 3
   %bo = ashr i8 128, %sel

diff  --git a/llvm/test/CodeGen/PowerPC/select_const.ll 
b/llvm/test/CodeGen/PowerPC/select_const.ll
index 7e8b6297ed3c..804cc7736bf8 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -610,13 +610,24 @@ define i8 @sel_constants_shl_constant(i1 %cond) {
 }
 
 define i8 @shl_constant_sel_constants(i1 %cond) {
-; ALL-LABEL: shl_constant_sel_constants:
-; ALL:   # %bb.0:
-; ALL-NEXT:clrlwi 3, 3, 31
-; ALL-NEXT:li 4, 1
-; ALL-NEXT:subfic 3, 3, 3
-; ALL-NEXT:slw 3, 4, 3
-; ALL-NEXT:blr
+; ISEL-LABEL: shl_constant_sel_constants:
+; ISEL:   # %bb.0:
+; ISEL-NEXT:andi. 3, 3, 1
+; ISEL-NEXT:li 4, 4
+; ISEL-NEXT:li 3, 8
+; ISEL-NEXT:iselgt 3, 4, 3
+; ISEL-NEXT:blr
+;
+; NO_ISEL-LABEL: shl_constant_sel_constants:
+; NO_ISEL:   # %bb.0:
+; NO_ISEL-NEXT:andi. 3, 3, 1
+; NO_ISEL-NEXT:li 4, 4
+; NO_ISEL-NEXT:li 3, 8
+; NO_ISEL-NEXT:bc 12, 1, .LBB37_1
+; NO_ISEL-NEXT:blr
+; NO_ISEL-NEXT:  .LBB37_1:
+; NO_ISEL-NEXT:addi 3, 4, 0
+; NO_ISEL-NEXT:blr
   %sel = select i1 %cond, i8 2, i8 3
   %bo = shl i8 1, %sel
   ret i8 %bo
@@ -647,13 +658,24 @@ define i8 @sel_constants_lshr_constant(i1 %cond) {
 }
 
 define i8 @lshr_constant_sel_constants(i1 %co

[llvm-branch-commits] [llvm] 477b650 - [PowerPC] Select the D-Form load if we know its offset meets the requirement

2020-12-17 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-18T07:27:26Z
New Revision: 477b6505fa1d49339c81fbbda937dc8bb5e53cfd

URL: 
https://github.com/llvm/llvm-project/commit/477b6505fa1d49339c81fbbda937dc8bb5e53cfd
DIFF: 
https://github.com/llvm/llvm-project/commit/477b6505fa1d49339c81fbbda937dc8bb5e53cfd.diff

LOG: [PowerPC] Select the D-Form load if we know its offset meets the 
requirement

The LD/STD likewise instruction are selected only when the alignment in
the load/store >= 4 to deal with the case that the offset might not be
known(i.e. relocations). That means we have to select the X-Form load
for %0 = load i64, i64* %arrayidx, align 2 In fact, we can still select
the D-Form load if the offset is known. So, we only query the load/store
alignment when we don't know if the offset is a multiple of 4.

Reviewed By: jji, Nemanjai

Differential Revision: https://reviews.llvm.org/D93099

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/test/CodeGen/PowerPC/ldst-align.ll
llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll
llvm/test/CodeGen/PowerPC/pr45186.ll
llvm/test/CodeGen/PowerPC/store-combine.ll
llvm/test/CodeGen/PowerPC/unal4-std.ll
llvm/test/CodeGen/PowerPC/unaligned.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td 
b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 9265c513c031..e19ea6a07a0d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1062,7 +1062,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
 def LWA  : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
 "lwa $rD, $src", IIC_LdStLWA,
 [(set i64:$rD,
-  (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
+  (DSFormSextLoadi32 iaddrX4:$src))]>, isPPC64,
 PPC970_DGroup_Cracked;
 let Interpretation64Bit = 1, isCodeGenOnly = 1 in
 def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -1173,7 +1173,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, 
ptr_rc_nor0:$ea_result),
 let PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
 "ld $rD, $src", IIC_LdStLD,
-[(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
+[(set i64:$rD, (DSFormLoad iaddrX4:$src))]>, isPPC64;
 // The following four definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1380,7 +1380,7 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, 
memrr:$dst),
 // Normal 8-byte stores.
 def STD  : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
 "std $rS, $dst", IIC_LdStSTD,
-[(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
+[(DSFormStore i64:$rS, iaddrX4:$dst)]>, isPPC64;
 def STDX  : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
   "stdx $rS, $dst", IIC_LdStSTD,
   [(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
@@ -1447,7 +1447,7 @@ def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, 
iaddroff:$ptroff),
   (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
 def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
   (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
-def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+def : Pat<(DSFormPreStore i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
   (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
 
 def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
@@ -1591,11 +1591,11 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
 
 // Patterns to match r+r indexed loads and stores for
 // addresses without at least 4-byte alignment.
-def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+def : Pat<(i64 (NonDSFormSextLoadi32 xoaddr:$src)),
   (LWAX xoaddr:$src)>;
-def : Pat<(i64 (unaligned4load xoaddr:$src)),
+def : Pat<(i64 (NonDSFormLoad xoaddr:$src)),
   (LDX xoaddr:$src)>;
-def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+def : Pat<(NonDSFormStore i64:$rS, xoaddr:$dst),
   (STDX $rS, xoaddr:$dst)>;
 
 // 64-bits atomic loads and stores

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td 
b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 849b96f507bd..018fb8ffe16c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -495,37 +495,41 @@ def imm64ZExt32  : Operand, ImmLeaf(Imm);
 }]>;
 
-// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// This is a somewhat weaker condition than actually checking for

[llvm-branch-commits] [llvm] fa42f08 - [PowerPC][FP128] Fix the incorrect calling convention for IEEE long double on Power8

2020-11-24 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-11-25T01:43:48Z
New Revision: fa42f08b2643d0a2e53fde8949e7f88b6d965bb8

URL: 
https://github.com/llvm/llvm-project/commit/fa42f08b2643d0a2e53fde8949e7f88b6d965bb8
DIFF: 
https://github.com/llvm/llvm-project/commit/fa42f08b2643d0a2e53fde8949e7f88b6d965bb8.diff

LOG: [PowerPC][FP128] Fix the incorrect calling convention for IEEE long double 
on Power8

For now, we are using the GPR to pass the arguments/return value for fp128 on 
Power8,
which is incorrect. It should be VSR. The reason why we do it this way is that,
we are setting the fp128 as illegal which make LLVM try to emulate it with i128 
on
Power8. So, we need to correct it as legal.

Reviewed By: Nemanjai

Differential Revision: https://reviews.llvm.org/D91527

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCCallingConv.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrAltivec.td
llvm/test/CodeGen/PowerPC/f128-arith.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td 
b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 9a15490f1fb0..64de7353c516 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -59,7 +59,7 @@ def RetCC_PPC_Cold : CallingConv<[
 
   CCIfType<[f32], CCAssignToReg<[F1]>>,
   CCIfType<[f64], CCAssignToReg<[F1]>>,
-  CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2]>>>,
+  CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2]>>>,
 
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
@@ -92,7 +92,7 @@ def RetCC_PPC : CallingConv<[
 
   // For P9, f128 are passed in vector registers.
   CCIfType<[f128],
-   CCIfSubtarget<"hasP9Vector()",
+   CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
 
   // Vector types returned as "direct" go into V2 .. V9; note that only the
@@ -149,7 +149,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfType<[f32],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[f128],
-   CCIfSubtarget<"hasP9Vector()",
+   CCIfSubtarget<"hasAltivec()",
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
CCIfSubtarget<"hasAltivec()",
@@ -216,7 +216,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
 
   // Vectors and float128 get 16-byte stack slots that are 16-byte aligned.
   CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 
16>>,
-  CCIfType<[f128], CCIfSubtarget<"hasP9Vector()", CCAssignToStack<16, 16>>>
+  CCIfType<[f128], CCIfSubtarget<"hasAltivec()", CCAssignToStack<16, 16>>>
 ]>;
 
 // This calling convention puts vector arguments always on the stack. It is 
used
@@ -238,7 +238,7 @@ def CC_PPC32_SVR4 : CallingConv<[
 
   // Float128 types treated as vector arguments.
   CCIfType<[f128],
-   CCIfSubtarget<"hasP9Vector()", CCAssignToReg<[V2, V3, V4, V5, V6, 
V7,
+   CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
   V8, V9, V10, V11, V12, V13]>>>,

   CCDelegateTo

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 5b42dbdb9bee..10aecf97fcdf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -121,6 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on 
ppc"), cl::Hidden);
 static cl::opt UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
 
+// TODO - Remove this option if soft fp128 has been fully supported .
+static cl::opt
+EnableSoftFP128("enable-soft-fp128",
+cl::desc("temp option to enable soft fp128"), cl::Hidden);
+
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumSiblingCalls, "Number of sibling calls");
 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
@@ -1161,6 +1166,32 @@ PPCTargetLowering::PPCTargetLowering(const 
PPCTargetMachine &TM,
   setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
   setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
   setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
+} else if (Subtarget.hasAltivec() && EnableSoftFP128) {
+  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+
+  for (MVT FPT : MVT::fp_valuetypes())
+setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+
+  setOperationAction(ISD::LOAD, MVT::f128, Promote);
+  setOperationAction(ISD::STORE, MVT::f128, Promote);
+
+  AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
+  AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
+
+  setOperationActi

[llvm-branch-commits] [llvm] 60c28a5 - [NFC][Test] Format the test for IEEE Long double

2020-11-24 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-11-25T03:00:24Z
New Revision: 60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee

URL: 
https://github.com/llvm/llvm-project/commit/60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee
DIFF: 
https://github.com/llvm/llvm-project/commit/60c28a5a2b76ebf9c8bac9ebf20ac8fe69c788ee.diff

LOG: [NFC][Test] Format the test for IEEE Long double

Added: 


Modified: 
llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
llvm/test/CodeGen/PowerPC/store_fptoi.ll

Removed: 




diff  --git a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll 
b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
index ffc626be2dea..26832efb3f4c 100644
--- a/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
@@ -1,65 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mcpu=a2 < %s | FileCheck %s 
-check-prefix=FPCVT
 ; RUN: llc -verify-machineinstrs -mcpu=ppc64 < %s | FileCheck %s 
-check-prefix=PPC64
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s 
-check-prefix=PWR9
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 ; Function Attrs: nounwind readnone
 define float @fool(float %X) #0 {
+; FPCVT-LABEL: fool:
+; FPCVT:   # %bb.0: # %entry
+; FPCVT-NEXT:friz 1, 1
+; FPCVT-NEXT:blr
+;
+; PPC64-LABEL: fool:
+; PPC64:   # %bb.0: # %entry
+; PPC64-NEXT:fctidz 0, 1
+; PPC64-NEXT:fcfid 0, 0
+; PPC64-NEXT:frsp 1, 0
+; PPC64-NEXT:blr
+;
+; PWR9-LABEL: fool:
+; PWR9:   # %bb.0: # %entry
+; PWR9-NEXT:xsrdpiz 1, 1
+; PWR9-NEXT:blr
 entry:
   %conv = fptosi float %X to i64
   %conv1 = sitofp i64 %conv to float
   ret float %conv1
 
-; FPCVT-LABEL: @fool
-; FPCVT: friz 1, 1
-; FPCVT: blr
 
-; PPC64-LABEL: @fool
-; PPC64: fctidz [[REG1:[0-9]+]], 1
-; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
-; PPC64: frsp 1, [[REG2]]
-; PPC64: blr
 }
 
 ; Function Attrs: nounwind readnone
 define double @foodl(double %X) #0 {
+; FPCVT-LABEL: foodl:
+; FPCVT:   # %bb.0: # %entry
+; FPCVT-NEXT:friz 1, 1
+; FPCVT-NEXT:blr
+;
+; PPC64-LABEL: foodl:
+; PPC64:   # %bb.0: # %entry
+; PPC64-NEXT:fctidz 0, 1
+; PPC64-NEXT:fcfid 1, 0
+; PPC64-NEXT:blr
+;
+; PWR9-LABEL: foodl:
+; PWR9:   # %bb.0: # %entry
+; PWR9-NEXT:xsrdpiz 1, 1
+; PWR9-NEXT:blr
 entry:
   %conv = fptosi double %X to i64
   %conv1 = sitofp i64 %conv to double
   ret double %conv1
 
-; FPCVT-LABEL: @foodl
-; FPCVT: friz 1, 1
-; FPCVT: blr
 
-; PPC64-LABEL: @foodl
-; PPC64: fctidz [[REG1:[0-9]+]], 1
-; PPC64: fcfid 1, [[REG1]]
-; PPC64: blr
 }
 
 ; Function Attrs: nounwind readnone
 define float @fooul(float %X) #0 {
+; FPCVT-LABEL: fooul:
+; FPCVT:   # %bb.0: # %entry
+; FPCVT-NEXT:friz 1, 1
+; FPCVT-NEXT:blr
+;
+; PPC64-LABEL: fooul:
+; PPC64:   # %bb.0: # %entry
+; PPC64-NEXT:addis 3, 2, .LCPI2_0@toc@ha
+; PPC64-NEXT:li 4, 1
+; PPC64-NEXT:lfs 0, .LCPI2_0@toc@l(3)
+; PPC64-NEXT:sldi 4, 4, 63
+; PPC64-NEXT:fsubs 2, 1, 0
+; PPC64-NEXT:fcmpu 0, 1, 0
+; PPC64-NEXT:fctidz 2, 2
+; PPC64-NEXT:stfd 2, -8(1)
+; PPC64-NEXT:fctidz 2, 1
+; PPC64-NEXT:stfd 2, -16(1)
+; PPC64-NEXT:ld 3, -8(1)
+; PPC64-NEXT:ld 5, -16(1)
+; PPC64-NEXT:xor 3, 3, 4
+; PPC64-NEXT:bc 12, 0, .LBB2_1
+; PPC64-NEXT:b .LBB2_2
+; PPC64-NEXT:  .LBB2_1: # %entry
+; PPC64-NEXT:addi 3, 5, 0
+; PPC64-NEXT:  .LBB2_2: # %entry
+; PPC64-NEXT:sradi 4, 3, 53
+; PPC64-NEXT:clrldi 5, 3, 63
+; PPC64-NEXT:addi 4, 4, 1
+; PPC64-NEXT:cmpldi 4, 1
+; PPC64-NEXT:rldicl 4, 3, 63, 1
+; PPC64-NEXT:or 5, 5, 4
+; PPC64-NEXT:rldicl 6, 5, 11, 53
+; PPC64-NEXT:addi 6, 6, 1
+; PPC64-NEXT:clrldi 7, 5, 53
+; PPC64-NEXT:cmpldi 1, 6, 1
+; PPC64-NEXT:clrldi 6, 3, 53
+; PPC64-NEXT:addi 7, 7, 2047
+; PPC64-NEXT:addi 6, 6, 2047
+; PPC64-NEXT:or 4, 7, 4
+; PPC64-NEXT:or 6, 6, 3
+; PPC64-NEXT:rldicl 4, 4, 53, 11
+; PPC64-NEXT:rldicr 6, 6, 0, 52
+; PPC64-NEXT:bc 12, 1, .LBB2_4
+; PPC64-NEXT:  # %bb.3: # %entry
+; PPC64-NEXT:ori 6, 3, 0
+; PPC64-NEXT:b .LBB2_4
+; PPC64-NEXT:  .LBB2_4: # %entry
+; PPC64-NEXT:rldicl 4, 4, 11, 1
+; PPC64-NEXT:cmpdi 3, 0
+; PPC64-NEXT:std 6, -32(1)
+; PPC64-NEXT:bc 12, 5, .LBB2_6
+; PPC64-NEXT:  # %bb.5: # %entry
+; PPC64-NEXT:ori 4, 5, 0
+; PPC64-NEXT:b .LBB2_6
+; PPC64-NEXT:  .LBB2_6: # %entry
+; PPC64-NEXT:std 4, -24(1)
+; PPC64-NEXT:bc 12, 0, .LBB2_8
+; PPC64-NEXT:  # %bb.7: # %entry
+; PPC64-NEXT:lfd 0, -32(1)
+; PPC64-NEXT:fcfid 0, 0
+; PPC64-NEXT:frsp 1, 0
+; PPC64-NEXT:blr
+; PPC64-NEXT:  .LBB2_8:
+; PPC64-NEXT:lfd 0, -24(1)
+; PPC64-NEXT:fcfid 0, 0
+; PPC64-NEXT:frsp 0, 0
+; PPC64-NEXT:fadds 1, 0, 0
+; PPC64-NEXT:blr
+;
+; PWR9-LABEL: fooul:
+; PWR9:   # %bb.0: # %entry
+; PWR9-NEXT:

[llvm-branch-commits] [llvm] 9c588f5 - [DAGCombine] Add hook to allow target specific test for sqrt input

2020-11-24 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-11-25T05:37:15Z
New Revision: 9c588f53fc423dd0ed69250fbc93b37b40c0ef44

URL: 
https://github.com/llvm/llvm-project/commit/9c588f53fc423dd0ed69250fbc93b37b40c0ef44
DIFF: 
https://github.com/llvm/llvm-project/commit/9c588f53fc423dd0ed69250fbc93b37b40c0ef44.diff

LOG: [DAGCombine] Add hook to allow target specific test for sqrt input

PowerPC has instruction ftsqrt/xstsqrtdp etc to do the input test for software 
square root.
LLVM now tests it with smallest normalized value using abs + setcc. We should 
add hook to
target that has test instructions.

Reviewed By: Spatel, Chen Zheng, Qiu Chao Fang

Differential Revision: https://reviews.llvm.org/D80706

Added: 


Modified: 
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstrFormats.td
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/fma-mutate.ll
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h 
b/llvm/include/llvm/CodeGen/TargetLowering.h
index 164cbd710713..16580a9160b9 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4277,6 +4277,15 @@ class TargetLowering : public TargetLoweringBase {
 return SDValue();
   }
 
+  /// Return a target-dependent comparison result if the input operand is
+  /// suitable for use with a square root estimate calculation. For example, 
the
+  /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
+  /// result should be used as the condition operand for a select or branch.
+  virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+   const DenormalMode &Mode) const {
+return SDValue();
+  }
+
   
//======//
   // Legalization utility functions
   //

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cae602d166d1..4ac1743d2d34 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22056,26 +22056,31 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue 
Op, SDNodeFlags Flags,
 // possibly a denormal. Force the answer to 0.0 for those cases.
 SDLoc DL(Op);
 EVT CCVT = getSetCCResultType(VT);
-ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
 DenormalMode DenormMode = DAG.getDenormalMode(VT);
-if (DenormMode.Input == DenormalMode::IEEE) {
-  // This is specifically a check for the handling of denormal inputs,
-  // not the result.
-
-  // fabs(X) < SmallestNormal ? 0.0 : Est
-  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
-  APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
-  SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
-  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
-  SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
-  SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
-  Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
-} else {
-  // X == 0.0 ? 0.0 : Est
-  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
-  SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
-  Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
+// Try the target specific test first.
+SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode);
+if (!Test) {
+  // If no test provided by target, testing it with denormal inputs to
+  // avoid wrong estimate.
+  if (DenormMode.Input == DenormalMode::IEEE) {
+// This is specifically a check for the handling of denormal 
inputs,
+// not the result.
+
+// Test = fabs(X) < SmallestNormal
+const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+  } else
+// Test = X == 0.0
+Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
 }
+// Test ? 0.0 : Est
+Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT
+ : ISD::SELECT,
+ 

[llvm-branch-commits] [llvm] 4d83aba - [DAGCombine] Adding a hook to improve the precision of fsqrt if the input is denormal

2020-11-26 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-11-27T02:10:55Z
New Revision: 4d83aba4228ecb7dfefaf10a36a35f7299467819

URL: 
https://github.com/llvm/llvm-project/commit/4d83aba4228ecb7dfefaf10a36a35f7299467819
DIFF: 
https://github.com/llvm/llvm-project/commit/4d83aba4228ecb7dfefaf10a36a35f7299467819.diff

LOG: [DAGCombine] Adding a hook to improve the precision of fsqrt if the input 
is denormal

For now, we will hardcode the result as 0.0 if the input is denormal or 0. That 
will
have the impact the precision. As the fsqrt added belong to the cold path of the
cmp+branch, it won't impact the performance for normal inputs for PowerPC, but 
improve
the precision if the input is denormal.

Reviewed By: Spatel

Differential Revision: https://reviews.llvm.org/D80974

Added: 


Modified: 
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/fma-mutate.ll
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h 
b/llvm/include/llvm/CodeGen/TargetLowering.h
index 16580a9160b9..4aeefd980d7a 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4286,6 +4286,13 @@ class TargetLowering : public TargetLoweringBase {
 return SDValue();
   }
 
+  /// Return a target-dependent result if the input operand is not suitable for
+  /// use with a square root estimate calculation.
+  virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
+  SelectionDAG &DAG) const {
+return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType());
+  }
+
   
//======//
   // Legalization utility functions
   //

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4ac1743d2d34..1b5debfe602e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22052,8 +22052,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, 
SDNodeFlags Flags,
 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
 
   if (!Reciprocal) {
-// The estimate is now completely wrong if the input was exactly 0.0 or
-// possibly a denormal. Force the answer to 0.0 for those cases.
 SDLoc DL(Op);
 EVT CCVT = getSetCCResultType(VT);
 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
@@ -22077,10 +22075,13 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue 
Op, SDNodeFlags Flags,
 // Test = X == 0.0
 Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
 }
-// Test ? 0.0 : Est
-Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT
- : ISD::SELECT,
-  DL, VT, Test, FPZero, Est);
+
+// The estimate is now completely wrong if the input was exactly 0.0 or
+// possibly a denormal. Force the answer to 0.0 or value provided by
+// target for those cases.
+Est = DAG.getNode(
+Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, 
VT,
+Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
   }
 }
 return Est;

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cf369f5f12c1..2d8dfb63f19c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1441,6 +1441,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned 
Opcode) const {
   case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
   case PPCISD::FTSQRT:
 return "PPCISD::FTSQRT";
+  case PPCISD::FSQRT:
+return "PPCISD::FSQRT";
   case PPCISD::STFIWX:  return "PPCISD::STFIWX";
   case PPCISD::VPERM:   return "PPCISD::VPERM";
   case PPCISD::XXSPLT:  return "PPCISD::XXSPLT";
@@ -12761,6 +12763,17 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue 
Op, SelectionDAG &DAG,
  0);
 }
 
+SDValue
+PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
+   SelectionDAG &DAG) const {
+  // TODO - add support for v2f64/v4f32
+  EVT VT = Op.getValueType();
+  if (VT != MVT::f64)
+return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
+
+  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
+}
+
 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
int Enabled, int &RefinementSteps,

[llvm-branch-commits] [llvm] 47f784a - [PowerPC] Promote the i1 to i64 for SINT_TO_FP/FP_TO_SINT

2020-12-01 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-02T05:37:45Z
New Revision: 47f784ace6bb43eb9d95277fcc847fb82abf0f7a

URL: 
https://github.com/llvm/llvm-project/commit/47f784ace6bb43eb9d95277fcc847fb82abf0f7a
DIFF: 
https://github.com/llvm/llvm-project/commit/47f784ace6bb43eb9d95277fcc847fb82abf0f7a.diff

LOG: [PowerPC] Promote the i1 to i64 for SINT_TO_FP/FP_TO_SINT

i1 is the native type for PowerPC if crbits is enabled. However, we need
to promote the i1 to i64 as we didn't have the pattern for i1.

Reviewed By: Qiu Chao Fang

Differential Revision: https://reviews.llvm.org/D92067

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/f128-conv.ll
llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2d8dfb63f19c..1864dc7f3113 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -234,6 +234,20 @@ PPCTargetLowering::PPCTargetLowering(const 
PPCTargetMachine &TM,
   setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
   AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
 isPPC64 ? MVT::i64 : MVT::i32);
+
+  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
+  AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
+isPPC64 ? MVT::i64 : MVT::i32);
+  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
+  AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
+isPPC64 ? MVT::i64 : MVT::i32);
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+  AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
+isPPC64 ? MVT::i64 : MVT::i32);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+  AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
+isPPC64 ? MVT::i64 : MVT::i32);
 } else {
   setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);

diff  --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll 
b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index 29cbe2dd4d3f..f8c6c97106ec 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -181,6 +181,47 @@ entry:
 
 }
 
+; Function Attrs: norecurse nounwind
+define void @sdwConv2qp_04(fp128* nocapture %a, i1 %b) {
+; CHECK-LABEL: sdwConv2qp_04:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:andi. r4, r4, 1
+; CHECK-NEXT:li r4, 0
+; CHECK-NEXT:li r5, -1
+; CHECK-NEXT:iselgt r4, r5, r4
+; CHECK-NEXT:mtvsrwa v2, r4
+; CHECK-NEXT:xscvsdqp v2, v2
+; CHECK-NEXT:stxv v2, 0(r3)
+; CHECK-NEXT:blr
+;
+; CHECK-P8-LABEL: sdwConv2qp_04:
+; CHECK-P8:   # %bb.0: # %entry
+; CHECK-P8-NEXT:mflr r0
+; CHECK-P8-NEXT:.cfi_def_cfa_offset 48
+; CHECK-P8-NEXT:.cfi_offset lr, 16
+; CHECK-P8-NEXT:.cfi_offset r30, -16
+; CHECK-P8-NEXT:std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:std r0, 16(r1)
+; CHECK-P8-NEXT:stdu r1, -48(r1)
+; CHECK-P8-NEXT:mr r30, r3
+; CHECK-P8-NEXT:clrldi r3, r4, 63
+; CHECK-P8-NEXT:neg r3, r3
+; CHECK-P8-NEXT:bl __floatsikf
+; CHECK-P8-NEXT:nop
+; CHECK-P8-NEXT:std r4, 8(r30)
+; CHECK-P8-NEXT:std r3, 0(r30)
+; CHECK-P8-NEXT:addi r1, r1, 48
+; CHECK-P8-NEXT:ld r0, 16(r1)
+; CHECK-P8-NEXT:ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-P8-NEXT:mtlr r0
+; CHECK-P8-NEXT:blr
+entry:
+  %conv = sitofp i1 %b to fp128
+  store fp128 %conv, fp128* %a, align 16
+  ret void
+
+}
+
 ; Function Attrs: norecurse nounwind
 define void @udwConv2qp(fp128* nocapture %a, i64 %b) {
 ; CHECK-LABEL: udwConv2qp:
@@ -349,6 +390,43 @@ entry:
 
 }
 
+; Function Attrs: norecurse nounwind
+define void @udwConv2qp_04(fp128* nocapture %a, i1 %b) {
+; CHECK-LABEL: udwConv2qp_04:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:clrlwi r4, r4, 31
+; CHECK-NEXT:mtvsrwa v2, r4
+; CHECK-NEXT:xscvsdqp v2, v2
+; CHECK-NEXT:stxv v2, 0(r3)
+; CHECK-NEXT:blr
+;
+; CHECK-P8-LABEL: udwConv2qp_04:
+; CHECK-P8:   # %bb.0: # %entry
+; CHECK-P8-NEXT:mflr r0
+; CHECK-P8-NEXT:.cfi_def_cfa_offset 48
+; CHECK-P8-NEXT:.cfi_offset lr, 16
+; CHECK-P8-NEXT:.cfi_offset r30, -16
+; CHECK-P8-NEXT:std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT:std r0, 16(r1)
+; CHECK-P8-NEXT:stdu r1, -48(r1)
+; CHECK-P8-NEXT:mr r30, r3
+; CHECK-P8-NEXT:clrldi r3, r4, 63
+; CHECK-P8-NEXT:bl __floatunsikf
+; CHECK-P8-NEXT:nop
+; CHECK-P8-NEXT:std r4, 8(r30)
+; CHECK-P8-NEXT:std r3, 0(r30)
+; CHECK-P8-NEXT:addi r1, r1, 48
+; CHECK-P8-NEXT:ld r0, 16(r1)
+; CHECK-P8-NEXT:ld r30, -16(r1) # 8-byte Folded Reload
+; C

[llvm-branch-commits] [llvm] 9bf0fea - [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64

2020-12-02 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-03T03:19:18Z
New Revision: 9bf0fea3729e3ad63da24f94ce22c6b4628bec15

URL: 
https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15
DIFF: 
https://github.com/llvm/llvm-project/commit/9bf0fea3729e3ad63da24f94ce22c6b4628bec15.diff

LOG: [PowerPC] Add the hw sqrt test for vector type v4f32/v2f64

PowerPC ISA support the input test for vector type v4f32 and v2f64.
Replace the software compare with hw test will improve the perf.

Reviewed By: ChenZheng

Differential Revision: https://reviews.llvm.org/D90914

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f9f84aa668bc..101ef686c180 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12760,9 +12760,10 @@ static int getEstimateRefinementSteps(EVT VT, const 
PPCSubtarget &Subtarget) {
 
 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
 const DenormalMode &Mode) const {
-  // TODO - add support for v2f64/v4f32
+  // We only have VSX Vector Test for software Square Root.
   EVT VT = Op.getValueType();
-  if (VT != MVT::f64)
+  if (VT != MVT::f64 &&
+  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
 return SDValue();
 
   SDLoc DL(Op);
@@ -12788,9 +12789,10 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue 
Op, SelectionDAG &DAG,
 SDValue
 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
SelectionDAG &DAG) const {
-  // TODO - add support for v2f64/v4f32
+  // We only have VSX Vector Square Root.
   EVT VT = Op.getValueType();
-  if (VT != MVT::f64)
+  if (VT != MVT::f64 &&
+  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
 return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
 
   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td 
b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e778ca4be6b5..35a0abcfd632 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -640,10 +640,12 @@ let hasSideEffects = 0 in {
 
   def XVTSQRTDP : XX2Form_1<60, 234,
   (outs crrc:$crD), (ins vsrc:$XB),
-  "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+  "xvtsqrtdp $crD, $XB", IIC_FPCompare,
+  [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>;
   def XVTSQRTSP : XX2Form_1<60, 170,
   (outs crrc:$crD), (ins vsrc:$XB),
-  "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+  "xvtsqrtsp $crD, $XB", IIC_FPCompare,
+  [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>;
   }
 
   def XVDIVDP : XX3Form<60, 120,
@@ -2464,6 +2466,8 @@ def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, (fneg v4f32:$C)),
   (XVNMADDASP $C, $A, $B)>;
 
 def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>;
+def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>;
 
 def : Pat<(v2f64 (bitconvert v4f32:$A)),
   (COPY_TO_REGCLASS $A, VSRC)>;

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll 
b/llvm/test/CodeGen/PowerPC/recipest.ll
index 3d9f2efc32e0..46da4cc6c471 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -953,24 +953,30 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
 ;
 ; CHECK-P8-LABEL: hoo3_fmf:
 ; CHECK-P8:   # %bb.0:
+; CHECK-P8-NEXT:xvtsqrtsp 0, 34
+; CHECK-P8-NEXT:bc 12, 2, .LBB24_2
+; CHECK-P8-NEXT:  # %bb.1:
 ; CHECK-P8-NEXT:xvrsqrtesp 0, 34
 ; CHECK-P8-NEXT:addis 3, 2, .LCPI24_0@toc@ha
 ; CHECK-P8-NEXT:addis 4, 2, .LCPI24_1@toc@ha
 ; CHECK-P8-NEXT:addi 3, 3, .LCPI24_0@toc@l
-; CHECK-P8-NEXT:lvx 3, 0, 3
-; CHECK-P8-NEXT:addi 3, 4, .LCPI24_1@toc@l
-; CHECK-P8-NEXT:lvx 4, 0, 3
 ; CHECK-P8-NEXT:xvmulsp 1, 34, 0
-; CHECK-P8-NEXT:xvmaddasp 35, 1, 0
-; CHECK-P8-NEXT:xvmulsp 0, 1, 36
-; CHECK-P8-NEXT:xxlxor 1, 1, 1
-; CHECK-P8-NEXT:xvcmpeqsp 2, 34, 1
-; CHECK-P8-NEXT:xvmulsp 0, 0, 35
-; CHECK-P8-NEXT:xxsel 34, 0, 1, 2
+; CHECK-P8-NEXT:lvx 2, 0, 3
+; CHECK-P8-NEXT:addi 3, 4, .LCPI24_1@toc@l
+; CHECK-P8-NEXT:lvx 3, 0, 3
+; CHECK-P8-NEXT:xvmaddasp 34, 1, 0
+; CHECK-P8-NEXT:xvmulsp 0, 1, 35
+; CHECK-P8-NEXT:xvmulsp 34, 0, 34
+; CHECK-P8-NEXT:blr
+; CHECK-P8-NEXT:  .LBB24_2:
+; CHECK-P8-NEXT:xvsqrtsp 34, 34
 ; CHECK-P8-NEXT:blr
 ;
 ; CHECK-P9-LABEL: hoo3_fmf:
 ; CHECK-P9:   # %bb.0:
+; CHECK-P9-NEXT:xvtsqrtsp 0, 34
+; C

[llvm-branch-commits] [llvm] c25b039 - [PowerPC] Fix the regression caused by commit 9c588f53fc42

2020-12-04 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-04T10:22:13Z
New Revision: c25b039e211441033069c7046324d2f76de37bed

URL: 
https://github.com/llvm/llvm-project/commit/c25b039e211441033069c7046324d2f76de37bed
DIFF: 
https://github.com/llvm/llvm-project/commit/c25b039e211441033069c7046324d2f76de37bed.diff

LOG: [PowerPC] Fix the regression caused by commit 9c588f53fc42

Add a TypeLegal check for MVT::i1 and add the test.

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 101ef686c1805..c5dbacde6fa5b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12762,8 +12762,9 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, 
SelectionDAG &DAG,
 const DenormalMode &Mode) const {
   // We only have VSX Vector Test for software Square Root.
   EVT VT = Op.getValueType();
-  if (VT != MVT::f64 &&
-  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
+  if (!isTypeLegal(MVT::i1) ||
+  (VT != MVT::f64 &&
+   ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX(
 return SDValue();
 
   SDLoc DL(Op);

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll 
b/llvm/test/CodeGen/PowerPC/recipest.ll
index 46da4cc6c4719..aad0bb47772ea 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -821,6 +821,90 @@ define double @foo3_fmf(double %a) nounwind {
   ret double %r
 }
 
+define double @foo3_fmf_crbits_off(double %a) #2 {
+; CHECK-P7-LABEL: foo3_fmf_crbits_off:
+; CHECK-P7:   # %bb.0:
+; CHECK-P7-NEXT:fabs 0, 1
+; CHECK-P7-NEXT:addis 3, 2, .LCPI21_2@toc@ha
+; CHECK-P7-NEXT:lfd 2, .LCPI21_2@toc@l(3)
+; CHECK-P7-NEXT:fcmpu 0, 0, 2
+; CHECK-P7-NEXT:blt 0, .LBB21_2
+; CHECK-P7-NEXT:  # %bb.1:
+; CHECK-P7-NEXT:frsqrte 0, 1
+; CHECK-P7-NEXT:addis 3, 2, .LCPI21_0@toc@ha
+; CHECK-P7-NEXT:addis 4, 2, .LCPI21_1@toc@ha
+; CHECK-P7-NEXT:lfs 3, .LCPI21_0@toc@l(3)
+; CHECK-P7-NEXT:lfs 4, .LCPI21_1@toc@l(4)
+; CHECK-P7-NEXT:fmul 2, 1, 0
+; CHECK-P7-NEXT:fmadd 2, 2, 0, 3
+; CHECK-P7-NEXT:fmul 0, 0, 4
+; CHECK-P7-NEXT:fmul 0, 0, 2
+; CHECK-P7-NEXT:fmul 1, 1, 0
+; CHECK-P7-NEXT:fmadd 0, 1, 0, 3
+; CHECK-P7-NEXT:fmul 1, 1, 4
+; CHECK-P7-NEXT:fmul 1, 1, 0
+; CHECK-P7-NEXT:blr
+; CHECK-P7-NEXT:  .LBB21_2:
+; CHECK-P7-NEXT:fsqrt 1, 1
+; CHECK-P7-NEXT:blr
+;
+; CHECK-P8-LABEL: foo3_fmf_crbits_off:
+; CHECK-P8:   # %bb.0:
+; CHECK-P8-NEXT:xsabsdp 0, 1
+; CHECK-P8-NEXT:addis 3, 2, .LCPI21_2@toc@ha
+; CHECK-P8-NEXT:lfd 2, .LCPI21_2@toc@l(3)
+; CHECK-P8-NEXT:xscmpudp 0, 0, 2
+; CHECK-P8-NEXT:blt 0, .LBB21_2
+; CHECK-P8-NEXT:  # %bb.1:
+; CHECK-P8-NEXT:xsrsqrtedp 0, 1
+; CHECK-P8-NEXT:addis 3, 2, .LCPI21_0@toc@ha
+; CHECK-P8-NEXT:lfs 3, .LCPI21_0@toc@l(3)
+; CHECK-P8-NEXT:addis 3, 2, .LCPI21_1@toc@ha
+; CHECK-P8-NEXT:lfs 4, .LCPI21_1@toc@l(3)
+; CHECK-P8-NEXT:fmr 5, 3
+; CHECK-P8-NEXT:xsmuldp 2, 1, 0
+; CHECK-P8-NEXT:xsmaddadp 5, 2, 0
+; CHECK-P8-NEXT:xsmuldp 0, 0, 4
+; CHECK-P8-NEXT:xsmuldp 0, 0, 5
+; CHECK-P8-NEXT:xsmuldp 1, 1, 0
+; CHECK-P8-NEXT:xsmaddadp 3, 1, 0
+; CHECK-P8-NEXT:xsmuldp 0, 1, 4
+; CHECK-P8-NEXT:xsmuldp 1, 0, 3
+; CHECK-P8-NEXT:blr
+; CHECK-P8-NEXT:  .LBB21_2:
+; CHECK-P8-NEXT:xssqrtdp 1, 1
+; CHECK-P8-NEXT:blr
+;
+; CHECK-P9-LABEL: foo3_fmf_crbits_off:
+; CHECK-P9:   # %bb.0:
+; CHECK-P9-NEXT:addis 3, 2, .LCPI21_2@toc@ha
+; CHECK-P9-NEXT:xsabsdp 0, 1
+; CHECK-P9-NEXT:lfd 2, .LCPI21_2@toc@l(3)
+; CHECK-P9-NEXT:xscmpudp 0, 0, 2
+; CHECK-P9-NEXT:blt 0, .LBB21_2
+; CHECK-P9-NEXT:  # %bb.1:
+; CHECK-P9-NEXT:xsrsqrtedp 0, 1
+; CHECK-P9-NEXT:addis 3, 2, .LCPI21_0@toc@ha
+; CHECK-P9-NEXT:lfs 3, .LCPI21_0@toc@l(3)
+; CHECK-P9-NEXT:addis 3, 2, .LCPI21_1@toc@ha
+; CHECK-P9-NEXT:xsmuldp 2, 1, 0
+; CHECK-P9-NEXT:fmr 4, 3
+; CHECK-P9-NEXT:xsmaddadp 4, 2, 0
+; CHECK-P9-NEXT:lfs 2, .LCPI21_1@toc@l(3)
+; CHECK-P9-NEXT:xsmuldp 0, 0, 2
+; CHECK-P9-NEXT:xsmuldp 0, 0, 4
+; CHECK-P9-NEXT:xsmuldp 1, 1, 0
+; CHECK-P9-NEXT:xsmaddadp 3, 1, 0
+; CHECK-P9-NEXT:xsmuldp 0, 1, 2
+; CHECK-P9-NEXT:xsmuldp 1, 0, 3
+; CHECK-P9-NEXT:blr
+; CHECK-P9-NEXT:  .LBB21_2:
+; CHECK-P9-NEXT:xssqrtdp 1, 1
+; CHECK-P9-NEXT:blr
+  %r = call reassoc ninf afn double @llvm.sqrt.f64(double %a)
+  ret double %r
+}
+
 define double @foo3_safe(double %a) nounwind {
 ; CHECK-P7-LABEL: foo3_safe:
 ; CHECK-P7:   # %bb.0:
@@ -844,67 +928,67 @@ define float @goo3_fmf(float %a) nounwind {
 ; CHECK-P7-LABEL: goo3_fmf:
 ; CHECK-P7:   # %bb.0:
 ; CHECK-P7-NEXT:fabs 0,

[llvm-branch-commits] [llvm] 08280c4 - [NFC][Test] Format the PowerPC test for incoming patch

2020-12-11 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-11T09:53:20Z
New Revision: 08280c4b73439e5f99000c89a818f66343e87aa6

URL: 
https://github.com/llvm/llvm-project/commit/08280c4b73439e5f99000c89a818f66343e87aa6
DIFF: 
https://github.com/llvm/llvm-project/commit/08280c4b73439e5f99000c89a818f66343e87aa6.diff

LOG: [NFC][Test] Format the PowerPC test for incoming patch

Added: 


Modified: 
llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll
llvm/test/CodeGen/PowerPC/unal4-std.ll
llvm/test/CodeGen/PowerPC/unaligned.ll

Removed: 




diff  --git a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll 
b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll
index b672eef8740af..d6ed3dcf41b0a 100644
--- a/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-dform-load-alignment.ll
@@ -1,16 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
 
 @best8x8mode = external dso_local local_unnamed_addr global [4 x i16], align 2
 define dso_local void @AlignDSForm() local_unnamed_addr {
+; CHECK-LABEL: AlignDSForm:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis r3, r2, best8x8mode@toc@ha
+; CHECK-NEXT:addi r3, r3, best8x8mode@toc@l
+; CHECK-NEXT:ldx r3, 0, r3
+; CHECK-NEXT:std r3, 0(r3)
 entry:
   %0 = load <4 x i16>, <4 x i16>* bitcast ([4 x i16]* @best8x8mode to <4 x 
i16>*), align 2
   store <4 x i16> %0, <4 x i16>* undef, align 4
   unreachable
-; CHECK-LABEL: AlignDSForm
-; CHECK: addis r{{[0-9]+}}, r{{[0-9]+}}, best8x8mode@toc@ha
-; CHECK: addi r[[REG:[0-9]+]], r{{[0-9]+}}, best8x8mode@toc@l
-; CHECK: ldx r{{[0-9]+}}, 0, r[[REG]]
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/unal4-std.ll 
b/llvm/test/CodeGen/PowerPC/unal4-std.ll
index f843b6b58c1ee..038ede0ba92ee 100644
--- a/llvm/test/CodeGen/PowerPC/unal4-std.ll
+++ b/llvm/test/CodeGen/PowerPC/unal4-std.ll
@@ -1,9 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr7 -mattr=+vsx | FileCheck 
-check-prefix=CHECK-VSX %s
 target datalayout = 
"E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 define void @copy_to_conceal(<8 x i16>* %inp) #0 {
+; CHECK-LABEL: copy_to_conceal:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vxor 2, 2, 2
+; CHECK-NEXT:addi 4, 1, -16
+; CHECK-NEXT:stvx 2, 0, 4
+; CHECK-NEXT:ld 4, -8(1)
+; CHECK-NEXT:std 4, 8(3)
+; CHECK-NEXT:ld 4, -16(1)
+; CHECK-NEXT:stdx 4, 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-VSX-LABEL: copy_to_conceal:
+; CHECK-VSX:   # %bb.0: # %entry
+; CHECK-VSX-NEXT:xxlxor 0, 0, 0
+; CHECK-VSX-NEXT:stxvw4x 0, 0, 3
+; CHECK-VSX-NEXT:blr
 entry:
   store <8 x i16> zeroinitializer, <8 x i16>* %inp, align 2
   br label %if.end210
@@ -14,11 +31,7 @@ if.end210:; preds = 
%entry
 ; This will generate two align-1 i64 stores. Make sure that they are
 ; indexed stores and not in r+i form (which require the offset to be
 ; a multiple of 4).
-; CHECK: @copy_to_conceal
-; CHECK: stdx {{[0-9]+}}, 0,
 
-; CHECK-VSX: @copy_to_conceal
-; CHECK-VSX: stxvw4x {{[0-9]+}}, 0,
 }
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" 
"frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" 
"unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/CodeGen/PowerPC/unaligned.ll 
b/llvm/test/CodeGen/PowerPC/unaligned.ll
index bd518342f3ec9..977c470e668e2 100644
--- a/llvm/test/CodeGen/PowerPC/unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/unaligned.ll
@@ -1,105 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu 
-mcpu=pwr7 -mattr=-vsx | FileCheck %s
 target datalayout = 
"E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu 
-mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = 
"E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 
 define void @foo1(i16* %p, i16* %r) nounwind {
+; CHECK-LABEL: foo1:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lhz 3, 0(3)
+; CHECK-NEXT:sth 3, 0(4)
+; CHECK-NEXT:blr
+;
+; CHECK-VSX-LABEL: foo1:
+; CHECK-VSX:   # %bb.0: # %entry
+; CHECK-VSX-NEXT:lhz 3, 0(3)
+; CHECK-VSX-NEXT:sth 3, 0(4)
+; CHECK-VSX

[llvm-branch-commits] [llvm] 68dbb77 - [NFC][Test] Add a test to verify the instruction form we got from isel

2020-12-11 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-11T10:36:46Z
New Revision: 68dbb7789e5388657420afcbdd0f928e4fdfcfb8

URL: 
https://github.com/llvm/llvm-project/commit/68dbb7789e5388657420afcbdd0f928e4fdfcfb8
DIFF: 
https://github.com/llvm/llvm-project/commit/68dbb7789e5388657420afcbdd0f928e4fdfcfb8.diff

LOG: [NFC][Test] Add a test to verify the instruction form we got from isel

Added: 
llvm/test/CodeGen/PowerPC/ldst-align.ll

Modified: 


Removed: 




diff  --git a/llvm/test/CodeGen/PowerPC/ldst-align.ll 
b/llvm/test/CodeGen/PowerPC/ldst-align.ll
new file mode 100644
index ..129f28191ec7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ldst-align.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \
+; RUN:   -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s
+define i64 @load(i64* %p) {
+  ; CHECK-LABEL: name: load
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x3
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 24
+  ; CHECK:   [[LDX:%[0-9]+]]:g8rc = LDX $zero8, killed [[ADDI8_]] :: (load 8 
from %ir.arrayidx, align 2)
+  ; CHECK:   $x3 = COPY [[LDX]]
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm, implicit $x3
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %p, i64 3
+  %0 = load i64, i64* %arrayidx, align 2
+  ret i64 %0
+}
+
+define void @store(i64* %p) {
+  ; CHECK-LABEL: name: store
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x3
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc = nuw ADDI8 [[COPY]], 16
+  ; CHECK:   [[LI8_:%[0-9]+]]:g8rc = LI8 9
+  ; CHECK:   STDX killed [[LI8_]], $zero8, killed [[ADDI8_]] :: (store 8 into 
%ir.arrayidx, align 1)
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %p, i64 2
+  store i64 9, i64* %arrayidx, align 1
+  ret void
+}
+
+define void @store_aligned(i64* %p) {
+  ; CHECK-LABEL: name: store_aligned
+  ; CHECK: bb.0.entry:
+  ; CHECK:   liveins: $x3
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK:   [[LI8_:%[0-9]+]]:g8rc = LI8 9
+  ; CHECK:   STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, 
align 4)
+  ; CHECK:   BLR8 implicit $lr8, implicit $rm
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %p, i64 2
+  store i64 9, i64* %arrayidx, align 4
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 08e287a - [PowerPC][FP128] Fix the incorrect signature for math library call

2020-12-13 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-14T07:52:56Z
New Revision: 08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7

URL: 
https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7
DIFF: 
https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7.diff

LOG: [PowerPC][FP128] Fix the incorrect signature for math library call

The runtime library has two family library implementation for ppc_fp128 and 
fp128.
For IBM Long double(ppc_fp128), it is suffixed with 'l', i.e(sqrtl). For
IEEE Long double(fp128), it is suffixed with "ieee128" or "f128".
We miss to map several libcall for IEEE Long double.

Reviewed By: qiucf

Differential Revision: https://reviews.llvm.org/D91675

Added: 


Modified: 
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/f128-arith.ll
llvm/test/CodeGen/PowerPC/f128-conv.ll
llvm/test/CodeGen/PowerPC/f128-rounding.ll
llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp 
b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 58543b48a994..553434cdd5fa 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -140,18 +140,23 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
 setLibcallName(RTLIB::SUB_F128, "__subkf3");
 setLibcallName(RTLIB::MUL_F128, "__mulkf3");
 setLibcallName(RTLIB::DIV_F128, "__divkf3");
+setLibcallName(RTLIB::POWI_F128, "__powikf2");
 setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
 setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
 setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
 setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
 setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
 setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
 setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
 setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
 setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
 setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
 setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
 setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
 setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
 setLibcallName(RTLIB::UNE_F128, "__nekf2");
 setLibcallName(RTLIB::OGE_F128, "__gekf2");

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 10cf7d7f5e02..a98d99af552c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1308,8 +1308,19 @@ PPCTargetLowering::PPCTargetLowering(const 
PPCTargetMachine &TM,
   setLibcallName(RTLIB::POW_F128, "powf128");
   setLibcallName(RTLIB::FMIN_F128, "fminf128");
   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
-  setLibcallName(RTLIB::POWI_F128, "__powikf2");
   setLibcallName(RTLIB::REM_F128, "fmodf128");
+  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
+  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
+  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
+  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
+  setLibcallName(RTLIB::ROUND_F128, "roundf128");
+  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
+  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
+  setLibcallName(RTLIB::RINT_F128, "rintf128");
+  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
+  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
+  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
+  setLibcallName(RTLIB::FMA_F128, "fmaf128");
 
   // With 32 condition bits, we don't need to sink (and duplicate) compares
   // aggressively in CodeGenPrep.

diff  --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll 
b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index 61bd03aa6368..587cf32a70e6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -195,7 +195,7 @@ define void @qpSqrt(fp128* nocapture readonly %a, fp128* 
nocapture %res) {
 ; CHECK-P8-NEXT:stdu r1, -48(r1)
 ; CHECK-P8-NEXT:lvx v2, 0, r3
 ; CHECK-P8-NEXT:mr r30, r4
-; CHECK-P8-NEXT:bl sqrtl
+; CHECK-P8-NEXT:bl sqrtf128
 ; CHECK-P8-NEXT:nop
 ; CHECK-P8-NEXT:stvx v2, 0, r30
 ; CHECK-P8-NEXT:addi r1, r1, 48
@@ -840,7 +840,7 @@ define void @qpCeil(fp128* nocapture readonly %a, fp128* 
nocapture %res) {
 ; CHECK-P8-NEXT:stdu r1, -48(r1)
 ; CHECK-P8-NEXT:lvx v2, 0, r3
 ; CHECK-P8-NEXT:mr r30, r4
-; CHECK-P8-NEXT:bl c