[llvm-branch-commits] [llvm] a772082 - PowerPC: Treat llvm.fma.f* intrinsic as using CTR with SPE

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:23-05:00
New Revision: a772082fc1c7aa043175bb34a9363d3c119a7930

URL: 
https://github.com/llvm/llvm-project/commit/a772082fc1c7aa043175bb34a9363d3c119a7930
DIFF: 
https://github.com/llvm/llvm-project/commit/a772082fc1c7aa043175bb34a9363d3c119a7930.diff

LOG: PowerPC: Treat llvm.fma.f* intrinsic as using CTR with SPE

Summary:
The SPE doesn't have a 'fma' instruction, so the intrinsic becomes a
libcall.  It really should become an expansion to two instructions, but
for some reason the compiler doesn't think that's as optimal as a
branch.  Since this lowering is done after CTR is allocated for loops,
tell the optimizer that CTR may be used in this case.  This prevents a
"Invalid PPC CTR loop!" assertion in the case that a fma() function call
is used in a C/C++ file, and clang converts it into an intrinsic.

Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D78668

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/test/CodeGen/PowerPC/spe.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp 
b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 698e2d48b53a..68a1d4321dd6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -319,6 +319,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, 
TargetLibraryInfo *LibInfo,
   return true;
 else
   continue; // ISD::FCOPYSIGN is never a library call.
+  case Intrinsic::fma:Opcode = ISD::FMA;break;
   case Intrinsic::sqrt:   Opcode = ISD::FSQRT;  break;
   case Intrinsic::floor:  Opcode = ISD::FFLOOR; break;
   case Intrinsic::ceil:   Opcode = ISD::FCEIL;  break;

diff  --git a/llvm/test/CodeGen/PowerPC/spe.ll 
b/llvm/test/CodeGen/PowerPC/spe.ll
index 8a39566d0870..10a9571a9877 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -1355,3 +1355,70 @@ return:
   ret double %1
 
 }
+
+define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fma:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mflr 0
+; CHECK-NEXT:stw 0, 4(1)
+; CHECK-NEXT:stwu 1, -48(1)
+; CHECK-NEXT:.cfi_def_cfa_offset 48
+; CHECK-NEXT:.cfi_offset lr, 4
+; CHECK-NEXT:.cfi_offset r29, -12
+; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK-NEXT:.cfi_offset r29, -40
+; CHECK-NEXT:.cfi_offset r30, -32
+; CHECK-NEXT:cmpwi 3, 1
+; CHECK-NEXT:stw 29, 36(1) # 4-byte Folded Spill
+; CHECK-NEXT:stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT:evstdd 29, 8(1) # 8-byte Folded Spill
+; CHECK-NEXT:evstdd 30, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT:blt 0, .LBB57_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:mr 30, 3
+; CHECK-NEXT:li 29, 0
+; CHECK-NEXT:# implicit-def: $r5
+; CHECK-NEXT:  .LBB57_2: # %for.body
+; CHECK-NEXT:#
+; CHECK-NEXT:efscfsi 3, 29
+; CHECK-NEXT:mr 4, 3
+; CHECK-NEXT:bl fmaf
+; CHECK-NEXT:addi 29, 29, 1
+; CHECK-NEXT:cmplw 30, 29
+; CHECK-NEXT:mr 5, 3
+; CHECK-NEXT:bne 0, .LBB57_2
+; CHECK-NEXT:b .LBB57_4
+; CHECK-NEXT:  .LBB57_3:
+; CHECK-NEXT:# implicit-def: $r5
+; CHECK-NEXT:  .LBB57_4: # %for.cond.cleanup
+; CHECK-NEXT:evldd 30, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT:evldd 29, 8(1) # 8-byte Folded Reload
+; CHECK-NEXT:mr 3, 5
+; CHECK-NEXT:lwz 30, 40(1) # 4-byte Folded Reload
+; CHECK-NEXT:lwz 29, 36(1) # 4-byte Folded Reload
+; CHECK-NEXT:lwz 0, 52(1)
+; CHECK-NEXT:addi 1, 1, 48
+; CHECK-NEXT:mtlr 0
+; CHECK-NEXT:blr
+entry:
+  %cmp8 = icmp sgt i32 %d, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+  %e.0.lcssa = phi float [ undef, %entry ], [ %0, %for.body ]
+  ret float %e.0.lcssa
+
+for.body: ; preds = %for.body, %entry
+  %f.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %e.09 = phi float [ %0, %for.body ], [ undef, %entry ]
+  %conv = sitofp i32 %f.010 to float
+  %0 = tail call float @llvm.fma.f32(float %conv, float %conv, float %e.09)
+  %inc = add nuw nsw i32 %f.010, 1
+  %exitcond = icmp eq i32 %inc, %d
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare float @llvm.fma.f32(float, float, float) #1
+
+attributes #1 = { nounwind readnone speculatable willreturn }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e82f0d9 - PowerPC: Add emergency stack spill slots for SPE

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:30-05:00
New Revision: e82f0d991fc01a8e509f4d6c2f5139af2d29f913

URL: 
https://github.com/llvm/llvm-project/commit/e82f0d991fc01a8e509f4d6c2f5139af2d29f913
DIFF: 
https://github.com/llvm/llvm-project/commit/e82f0d991fc01a8e509f4d6c2f5139af2d29f913.diff

LOG: PowerPC: Add emergency stack spill slots for SPE

The powerpcspe 64-bit load/store only allows a 8-bit delta (32 64-bit
words), so if the stack size is any larger than that, we need extra
spill slots for doing indexing.

Added: 
llvm/test/CodeGen/PowerPC/spe-spills.ll

Modified: 
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 7fbdf97793f7..bef9bcbdfaeb 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2062,7 +2062,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction 
&MF,
   unsigned StackSize = determineFrameLayout(MF, true);
   MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
-  hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+  hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize)) ||
+  (Subtarget.hasSPE() && hasSpills(MF) && !isInt<8>(StackSize))) {
 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;

diff  --git a/llvm/test/CodeGen/PowerPC/spe-spills.ll 
b/llvm/test/CodeGen/PowerPC/spe-spills.ll
new file mode 100644
index ..8fb0f2278fff
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/spe-spills.ll
@@ -0,0 +1,721 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \
+; RUN:  -mattr=+spe |  FileCheck %s
+; Tests that spill slots are allocated for stacks larger than 256 bytes on
+; powerpcspe targets
+
+@d = local_unnamed_addr global double* null, align 4
+@c = local_unnamed_addr global i32 0, align 4
+@g = local_unnamed_addr global double 0.00e+00, align 8
+@e = local_unnamed_addr global double* null, align 4
+@h = local_unnamed_addr global double 0.00e+00, align 8
+@j = local_unnamed_addr global double 0.00e+00, align 8
+@f = local_unnamed_addr global double 0.00e+00, align 8
+@a = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree norecurse nounwind sspstrong uwtable
+define i32 @k(double* nocapture readonly %l, double* nocapture %m, i32 %aa, 
i32 %ab, i32 %n, i32 %ac, i32 %ad) local_unnamed_addr #0 {
+; CHECK-LABEL: k:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:stwu 1, -480(1)
+; CHECK-NEXT:.cfi_def_cfa_offset 480
+; CHECK-NEXT:.cfi_offset r14, -72
+; CHECK-NEXT:.cfi_offset r15, -68
+; CHECK-NEXT:.cfi_offset r16, -64
+; CHECK-NEXT:.cfi_offset r17, -60
+; CHECK-NEXT:.cfi_offset r18, -56
+; CHECK-NEXT:.cfi_offset r19, -52
+; CHECK-NEXT:.cfi_offset r20, -48
+; CHECK-NEXT:.cfi_offset r21, -44
+; CHECK-NEXT:.cfi_offset r22, -40
+; CHECK-NEXT:.cfi_offset r23, -36
+; CHECK-NEXT:.cfi_offset r24, -32
+; CHECK-NEXT:.cfi_offset r25, -28
+; CHECK-NEXT:.cfi_offset r26, -24
+; CHECK-NEXT:.cfi_offset r27, -20
+; CHECK-NEXT:.cfi_offset r28, -16
+; CHECK-NEXT:.cfi_offset r29, -12
+; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK-NEXT:.cfi_offset r31, -4
+; CHECK-NEXT:.cfi_offset r14, -224
+; CHECK-NEXT:.cfi_offset r15, -216
+; CHECK-NEXT:.cfi_offset r16, -208
+; CHECK-NEXT:.cfi_offset r17, -200
+; CHECK-NEXT:.cfi_offset r18, -192
+; CHECK-NEXT:.cfi_offset r19, -184
+; CHECK-NEXT:.cfi_offset r20, -176
+; CHECK-NEXT:.cfi_offset r21, -168
+; CHECK-NEXT:.cfi_offset r22, -160
+; CHECK-NEXT:.cfi_offset r23, -152
+; CHECK-NEXT:.cfi_offset r24, -144
+; CHECK-NEXT:.cfi_offset r25, -136
+; CHECK-NEXT:.cfi_offset r26, -128
+; CHECK-NEXT:.cfi_offset r27, -120
+; CHECK-NEXT:.cfi_offset r28, -112
+; CHECK-NEXT:.cfi_offset r29, -104
+; CHECK-NEXT:.cfi_offset r30, -96
+; CHECK-NEXT:.cfi_offset r31, -88
+; CHECK-NEXT:li 10, 256
+; CHECK-NEXT:cmpwi 7, 1
+; CHECK-NEXT:stw 14, 408(1) # 4-byte Folded Spill
+; CHECK-NEXT:stw 15, 412(1) # 4-byte Folded Spill
+; CHECK-NEXT:stw 16, 416(1) # 4-byte Folded Spill
+; CHECK-NEXT:evstddx 14, 1, 10 # 8-byte Folded Spill
+; CHECK-NEXT:li 10, 264
+; CHECK-NEXT:stw 17, 420(1) # 4-byte Folded Spill
+; CHECK-NEXT:stw 18, 424(1) # 4-byte Folded Spill
+; CHECK-NEXT:stw 19, 428(1) # 4-byte Folded Spill
+; CHECK-NEXT:evstddx 15, 1, 10 # 8-byte Folded Spill
+; CHECK-NEXT:li 10, 272
+; CHECK-NEXT:stw 20, 432(1) # 4-byte Folded Spill
+; CH

[llvm-branch-commits] [llvm] 33e552d - PowerPC: Fix SPE f64 VAARG handling.

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:26-05:00
New Revision: 33e552d43617201799637faeabfdbe80e9d80a1b

URL: 
https://github.com/llvm/llvm-project/commit/33e552d43617201799637faeabfdbe80e9d80a1b
DIFF: 
https://github.com/llvm/llvm-project/commit/33e552d43617201799637faeabfdbe80e9d80a1b.diff

LOG: PowerPC: Fix SPE f64 VAARG handling.

SPE follows soft-float ABI for doubles, including VAARG passing.  For
soft-float, doubles are bitcast to i64, but for SPE they are not, so we
need to perform GPR alignment explicitly for SPE f64.

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4a733986412c..83a119fec86b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3229,7 +3229,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, 
SelectionDAG &DAG) const {
 VAListPtr, MachinePointerInfo(SV), 
MVT::i8);
   InChain = GprIndex.getValue(1);
 
-  if (VT == MVT::i64) {
+  if (VT == MVT::i64 || (hasSPE() && VT == MVT::f64)) {
 // Check if GprIndex is even
 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
  DAG.getConstant(1, dl, MVT::i32));



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 619fa4c - PowerPC: Don't hoist float multiply + add to fused operation on SPE

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:28-05:00
New Revision: 619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f

URL: 
https://github.com/llvm/llvm-project/commit/619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f
DIFF: 
https://github.com/llvm/llvm-project/commit/619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f.diff

LOG: PowerPC: Don't hoist float multiply + add to fused operation on SPE

SPE doesn't have a fmadd instruction, so don't bother hoisting a
multiply and add sequence to this, as it'd become just a library call.
Hoisting happens too late for the CTR usability test to veto using the
CTR in a loop, and results in an assert "Invalid PPC CTR loop!".

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 83a119fec86b..2da0665d5123 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -15718,6 +15718,8 @@ bool 
PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
 
 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
+  if (Subtarget.hasSPE())
+return false;
   switch (Ty->getScalarType()->getTypeID()) {
   case Type::FloatTyID:
   case Type::DoubleTyID:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libunwind] df0a004 - PowerPCSPE: Stop libunwind from complaining about SPE registers

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:27-05:00
New Revision: df0a004bf3796a9a2b2d60b2c8f96e996aa36639

URL: 
https://github.com/llvm/llvm-project/commit/df0a004bf3796a9a2b2d60b2c8f96e996aa36639
DIFF: 
https://github.com/llvm/llvm-project/commit/df0a004bf3796a9a2b2d60b2c8f96e996aa36639.diff

LOG: PowerPCSPE: Stop libunwind from complaining about SPE registers

* Match SPE "DWARF" register numbers to GCC's instead of official DWARF
  documentation.
* Increase the register count to 148 from 112, even though the upper
  registers aren't saved (yet).

Added: 


Modified: 
libunwind/include/__libunwind_config.h
libunwind/include/libunwind.h
llvm/lib/Target/PowerPC/PPCRegisterInfo.td

Removed: 




diff  --git a/libunwind/include/__libunwind_config.h 
b/libunwind/include/__libunwind_config.h
index 71d77ca65118..16bcf20d2ba0 100644
--- a/libunwind/include/__libunwind_config.h
+++ b/libunwind/include/__libunwind_config.h
@@ -16,7 +16,7 @@
 
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86   8
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_6432
-#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC   112
+#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC   148
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64 116
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64 95
 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM   287

diff  --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
index 23ef47f4ac83..35181378e0de 100644
--- a/libunwind/include/libunwind.h
+++ b/libunwind/include/libunwind.h
@@ -298,7 +298,39 @@ enum {
   UNW_PPC_VRSAVE  = 109,
   UNW_PPC_VSCR= 110,
   UNW_PPC_SPE_ACC = 111,
-  UNW_PPC_SPEFSCR = 112
+  UNW_PPC_SPEFSCR = 112,
+  UNW_PPC_RH0 = 117,
+  UNW_PPC_RH1 = 118,
+  UNW_PPC_RH2 = 119,
+  UNW_PPC_RH3 = 120,
+  UNW_PPC_RH4 = 121,
+  UNW_PPC_RH5 = 122,
+  UNW_PPC_RH6 = 123,
+  UNW_PPC_RH7 = 124,
+  UNW_PPC_RH8 = 125,
+  UNW_PPC_RH9 = 126,
+  UNW_PPC_RH10 = 117,
+  UNW_PPC_RH11 = 128,
+  UNW_PPC_RH12 = 129,
+  UNW_PPC_RH13 = 130,
+  UNW_PPC_RH14 = 131,
+  UNW_PPC_RH15 = 132,
+  UNW_PPC_RH16 = 133,
+  UNW_PPC_RH17 = 134,
+  UNW_PPC_RH18 = 135,
+  UNW_PPC_RH19 = 136,
+  UNW_PPC_RH20 = 137,
+  UNW_PPC_RH21 = 138,
+  UNW_PPC_RH22 = 139,
+  UNW_PPC_RH23 = 140,
+  UNW_PPC_RH24 = 141,
+  UNW_PPC_RH25 = 142,
+  UNW_PPC_RH26 = 143,
+  UNW_PPC_RH27 = 144,
+  UNW_PPC_RH28 = 145,
+  UNW_PPC_RH29 = 146,
+  UNW_PPC_RH30 = 147,
+  UNW_PPC_RH31 = 148
 };
 
 // 64-bit ppc register numbers

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td 
b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index b45757c1acc5..c136939a527b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -115,7 +115,7 @@ foreach Index = 0-31 in {
 // SPE registers
 foreach Index = 0-31 in {
   def S#Index : SPE("R"#Index), "r"#Index>,
-DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>;
+DwarfRegNum<[!add(Index, 117), !add(Index, 117)]>;
 }
 
 // Floating-point registers



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] bd71857 - PowerPC: Fix SPE extloadf32 handling.

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:29-05:00
New Revision: bd718572089afe9f4eb627ec153a2b33d781935f

URL: 
https://github.com/llvm/llvm-project/commit/bd718572089afe9f4eb627ec153a2b33d781935f
DIFF: 
https://github.com/llvm/llvm-project/commit/bd718572089afe9f4eb627ec153a2b33d781935f.diff

LOG: PowerPC: Fix SPE extloadf32 handling.

The patterns were incorrect copies from the FPU code, and are
unnecessary, since there's no extended load for SPE.  Just let LLVM
itself do the work by marking it expand.

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrSPE.td
llvm/test/CodeGen/PowerPC/spe.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2da0665d5123..ac39eb5b71cc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -323,6 +323,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine 
&TM,
 setOperationAction(ISD::FMA  , MVT::f32, Legal);
   }
 
+  if (Subtarget.hasSPE())
+setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
   // If we're enabling GP optimizations, use hardware square root

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td 
b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 935c3044ae47..e3bb64d7921e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -819,16 +819,6 @@ def SPESTWX   : XForm_8<31, 151, (outs), (ins 
spe4rc:$rS, memrr:$dst),
 
 } // HasSPE
 
-let Predicates = [HasSPE] in {
-def : Pat<(f64 (extloadf32 iaddr:$src)),
-  (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>;
-def : Pat<(f64 (extloadf32 xaddr:$src)),
-  (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>;
-
-def : Pat<(f64 (fpextend f32:$src)),
-  (COPY_TO_REGCLASS $src, SPERC)>;
-}
-
 let Predicates = [HasSPE] in {
 def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
 (ins crrc:$cond, spe4rc:$T, spe4rc:$F,

diff  --git a/llvm/test/CodeGen/PowerPC/spe.ll 
b/llvm/test/CodeGen/PowerPC/spe.ll
index 10a9571a9877..ab45e144d53b 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -1422,3 +1422,64 @@ for.body: ; 
preds = %for.body, %entry
 declare float @llvm.fma.f32(float, float, float) #1
 
 attributes #1 = { nounwind readnone speculatable willreturn }
+
+%struct.a = type { float, float }
+
+define void @d(%struct.a* %e, %struct.a* %f) {
+; CHECK-LABEL: d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:mflr 0
+; CHECK-NEXT:stw 0, 4(1)
+; CHECK-NEXT:stwu 1, -48(1)
+; CHECK-NEXT:.cfi_def_cfa_offset 48
+; CHECK-NEXT:.cfi_offset lr, 4
+; CHECK-NEXT:.cfi_offset r29, -12
+; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK-NEXT:.cfi_offset r29, -40
+; CHECK-NEXT:.cfi_offset r30, -32
+; CHECK-NEXT:lwz 4, 0(4)
+; CHECK-NEXT:lwz 3, 0(3)
+; CHECK-NEXT:stw 29, 36(1) # 4-byte Folded Spill
+; CHECK-NEXT:evstdd 29, 8(1) # 8-byte Folded Spill
+; CHECK-NEXT:efdcfs 29, 4
+; CHECK-NEXT:stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT:evstdd 30, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT:efdcfs 30, 3
+; CHECK-NEXT:evmergehi 3, 29, 29
+; CHECK-NEXT:mr 4, 29
+; CHECK-NEXT:mtctr 3
+; CHECK-NEXT:# kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT:bctrl
+; CHECK-NEXT:evmergehi 3, 30, 30
+; CHECK-NEXT:mr 4, 30
+; CHECK-NEXT:mtctr 3
+; CHECK-NEXT:# kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT:bctrl
+; CHECK-NEXT:li 3, .LCPI58_0@l
+; CHECK-NEXT:lis 4, .LCPI58_0@ha
+; CHECK-NEXT:evlddx 3, 4, 3
+; CHECK-NEXT:evldd 30, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT:lwz 30, 40(1) # 4-byte Folded Reload
+; CHECK-NEXT:efdmul 3, 29, 3
+; CHECK-NEXT:efscfd 3, 3
+; CHECK-NEXT:evldd 29, 8(1) # 8-byte Folded Reload
+; CHECK-NEXT:stw 3, 0(3)
+; CHECK-NEXT:lwz 29, 36(1) # 4-byte Folded Reload
+; CHECK-NEXT:lwz 0, 52(1)
+; CHECK-NEXT:addi 1, 1, 48
+; CHECK-NEXT:mtlr 0
+; CHECK-NEXT:blr
+entry:
+  %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0
+  %1 = load float, float* undef
+  %conv = fpext float %1 to double
+  %2 = load float, float* %0
+  %g = fpext float %2 to double
+  %3 = call i32 undef(double %g)
+  %h = call i32 undef(double %conv)
+  %n = sitofp i32 %3 to double
+  %k = fmul double %g, %n
+  %l = fptrunc double %k to float
+  store float %l, float* undef
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b901070 - [PowerPC] Relax the restrictions on loading doubles with SPE

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:25-05:00
New Revision: b901070f406129e71f9057357486ef8e1101ad6d

URL: 
https://github.com/llvm/llvm-project/commit/b901070f406129e71f9057357486ef8e1101ad6d
DIFF: 
https://github.com/llvm/llvm-project/commit/b901070f406129e71f9057357486ef8e1101ad6d.diff

LOG: [PowerPC] Relax the restrictions on loading doubles with SPE

The original commit forced all 64-bit values to be loaded from indexed
registers, regardless of how close they were located to a given base
register.  This relaxes that, and permits some to be immediate-indexed
if they fit within a signed 255 (really 248, 8-byte aligned mask) byte
window.  Patch by kthomsen.

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 79fbb160e8bf..4a733986412c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2359,19 +2359,42 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
+/// isIntU8Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero (unsigned) extension from an 8-bit value.  If so, this returns true 
and
+/// the immediate.
+bool llvm::isIntU8Immediate(SDNode *N, uint8_t &Imm) {
+  if (!isa(N))
+return false;
+  Imm = (uint8_t)cast(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+return Imm == (int32_t)cast(N)->getZExtValue();
+  else
+return Imm == (int64_t)cast(N)->getZExtValue();
+}
+bool llvm::isIntU8Immediate(SDValue Op, uint8_t &Imm) {
+  return isIntU8Immediate(Op.getNode(), Imm);
+}
 
-/// SelectAddressEVXRegReg - Given the specified address, check to see if it 
can
-/// be represented as an indexed [r+r] operation.
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it
+/// must be represented as an indexed [r+r] operation for EVLDD and EVSTD
+/// instructions.  If the address is known now, it will be checked if it fits
+/// into the 8-bit offset, with an alignment of 8.
 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
+  const unsigned EVXEncodingAlignment = 8;
   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
   UI != E; ++UI) {
 if (MemSDNode *Memop = dyn_cast(*UI)) {
   if (Memop->getMemoryVT() == MVT::f64) {
-  Base = N.getOperand(0);
-  Index = N.getOperand(1);
-  return true;
+uint8_t imm = 0;
+if (isIntU8Immediate(N.getOperand(1), imm) &&
+!(imm % EVXEncodingAlignment))
+  return false; // Offset is okay for the 8-bit index
+Base = N.getOperand(0);
+Index = N.getOperand(1);
+return true; // Offset is unknown or too large, so use [r+r]
   }
 }
   }

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h 
b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 41046df6c043..9235d3f30f4e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1265,6 +1265,9 @@ namespace llvm {
   bool isIntS16Immediate(SDNode *N, int16_t &Imm);
   bool isIntS16Immediate(SDValue Op, int16_t &Imm);
 
+  bool isIntU8Immediate(SDNode *N, uint8_t &Imm);
+  bool isIntU8Immediate(SDValue Op, uint8_t &Imm);
+
 } // end namespace llvm
 
 #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 95dcbfa - PowerPC: Don't lower SELECT_CC to PPCISD::FSEL on SPE

2020-05-12 Thread Justin Hibbits via llvm-branch-commits

Author: Justin Hibbits
Date: 2020-05-12T16:53:24-05:00
New Revision: 95dcbfa0e5ce701ac20b34f1028a398996df3f34

URL: 
https://github.com/llvm/llvm-project/commit/95dcbfa0e5ce701ac20b34f1028a398996df3f34
DIFF: 
https://github.com/llvm/llvm-project/commit/95dcbfa0e5ce701ac20b34f1028a398996df3f34.diff

LOG: PowerPC: Don't lower SELECT_CC to PPCISD::FSEL on SPE

Summary:
SPE doesn't have a fsel instruction, so don't try to lower to it.

This fixes a "Cannot select: tN: f64 = PPCISD::FSEL tX, tY, tZ" error.

Reviewed By: lkail
Differential Revision: https://reviews.llvm.org/D3

Added: 
llvm/test/CodeGen/PowerPC/spe-fastmath.ll

Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9d49b3a1e069..79fbb160e8bf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7980,9 +7980,9 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction 
when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 
{
-  // Not FP? Not a fsel.
+  // Not FP, or using SPE? Not a fsel.
   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
-  !Op.getOperand(2).getValueType().isFloatingPoint())
+  !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
 return Op;
 
   ISD::CondCode CC = cast(Op.getOperand(4))->get();

diff  --git a/llvm/test/CodeGen/PowerPC/spe-fastmath.ll 
b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll
new file mode 100644
index ..d2b83f7ee1da
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \
+; RUN:   -mattr=+spe |  FileCheck %s
+
+define void @no_fsel(i32 %e) #0 {
+; CHECK-LABEL: no_fsel:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 4, .LCPI0_0@l
+; CHECK-NEXT:lis 5, .LCPI0_0@ha
+; CHECK-NEXT:evlddx 4, 5, 4
+; CHECK-NEXT:efdcfui 3, 3
+; CHECK-NEXT:efdmul 5, 3, 3
+; CHECK-NEXT:efdcmpeq 0, 5, 4
+; CHECK-NEXT:ble 0, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:evor 3, 4, 4
+; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:efdctsiz 3, 3
+; CHECK-NEXT:sth 3, 0(3)
+; CHECK-NEXT:blr
+entry:
+  %conv = uitofp i32 %e to double
+  %mul = fmul double %conv, %conv
+  %tobool = fcmp une double %mul, 0.00e+00
+  %cond = select i1 %tobool, double %conv, double 0.00e+00
+  %conv3 = fptosi double %cond to i16
+  store i16 %conv3, i16* undef
+  ret void
+}
+
+attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits