[llvm-branch-commits] [llvm] a772082 - PowerPC: Treat llvm.fma.f* intrinsic as using CTR with SPE
Author: Justin Hibbits Date: 2020-05-12T16:53:23-05:00 New Revision: a772082fc1c7aa043175bb34a9363d3c119a7930 URL: https://github.com/llvm/llvm-project/commit/a772082fc1c7aa043175bb34a9363d3c119a7930 DIFF: https://github.com/llvm/llvm-project/commit/a772082fc1c7aa043175bb34a9363d3c119a7930.diff LOG: PowerPC: Treat llvm.fma.f* intrinsic as using CTR with SPE Summary: The SPE doesn't have a 'fma' instruction, so the intrinsic becomes a libcall. It really should become an expansion to two instructions, but for some reason the compiler doesn't think that's as optimal as a branch. Since this lowering is done after CTR is allocated for loops, tell the optimizer that CTR may be used in this case. This prevents a "Invalid PPC CTR loop!" assertion in the case that a fma() function call is used in a C/C++ file, and clang converts it into an intrinsic. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D78668 Added: Modified: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/test/CodeGen/PowerPC/spe.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 698e2d48b53a..68a1d4321dd6 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -319,6 +319,7 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, return true; else continue; // ISD::FCOPYSIGN is never a library call. + case Intrinsic::fma:Opcode = ISD::FMA;break; case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 8a39566d0870..10a9571a9877 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1355,3 +1355,70 @@ return: ret double %1 } + +define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: test_fma: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:mflr 0 +; CHECK-NEXT:stw 0, 4(1) +; CHECK-NEXT:stwu 1, -48(1) +; CHECK-NEXT:.cfi_def_cfa_offset 48 +; CHECK-NEXT:.cfi_offset lr, 4 +; CHECK-NEXT:.cfi_offset r29, -12 +; CHECK-NEXT:.cfi_offset r30, -8 +; CHECK-NEXT:.cfi_offset r29, -40 +; CHECK-NEXT:.cfi_offset r30, -32 +; CHECK-NEXT:cmpwi 3, 1 +; CHECK-NEXT:stw 29, 36(1) # 4-byte Folded Spill +; CHECK-NEXT:stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT:evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT:evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT:blt 0, .LBB57_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT:mr 30, 3 +; CHECK-NEXT:li 29, 0 +; CHECK-NEXT:# implicit-def: $r5 +; CHECK-NEXT: .LBB57_2: # %for.body +; CHECK-NEXT:# +; CHECK-NEXT:efscfsi 3, 29 +; CHECK-NEXT:mr 4, 3 +; CHECK-NEXT:bl fmaf +; CHECK-NEXT:addi 29, 29, 1 +; CHECK-NEXT:cmplw 30, 29 +; CHECK-NEXT:mr 5, 3 +; CHECK-NEXT:bne 0, .LBB57_2 +; CHECK-NEXT:b .LBB57_4 +; CHECK-NEXT: .LBB57_3: +; CHECK-NEXT:# implicit-def: $r5 +; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup +; CHECK-NEXT:evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT:evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT:mr 3, 5 +; CHECK-NEXT:lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT:lwz 29, 36(1) # 4-byte Folded Reload +; CHECK-NEXT:lwz 0, 52(1) +; CHECK-NEXT:addi 1, 1, 48 +; CHECK-NEXT:mtlr 0 +; CHECK-NEXT:blr +entry: + %cmp8 = icmp sgt i32 %d, 0 + br i1 %cmp8, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %e.0.lcssa = phi float [ undef, %entry ], [ %0, %for.body ] + ret float %e.0.lcssa + +for.body: ; preds = %for.body, %entry + %f.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %e.09 = phi float [ %0, %for.body ], [ undef, %entry ] + %conv = sitofp i32 %f.010 to float + %0 = tail call float @llvm.fma.f32(float %conv, float %conv, float %e.09) + %inc = add nuw nsw i32 %f.010, 1 + %exitcond = icmp eq i32 %inc, %d + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fma.f32(float, float, float) #1 + +attributes #1 = { nounwind readnone speculatable willreturn } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e82f0d9 - PowerPC: Add emergency stack spill slots for SPE
Author: Justin Hibbits Date: 2020-05-12T16:53:30-05:00 New Revision: e82f0d991fc01a8e509f4d6c2f5139af2d29f913 URL: https://github.com/llvm/llvm-project/commit/e82f0d991fc01a8e509f4d6c2f5139af2d29f913 DIFF: https://github.com/llvm/llvm-project/commit/e82f0d991fc01a8e509f4d6c2f5139af2d29f913.diff LOG: PowerPC: Add emergency stack spill slots for SPE The powerpcspe 64-bit load/store only allows a 8-bit delta (32 64-bit words), so if the stack size is any larger than that, we need extra spill slots for doing indexing. Added: llvm/test/CodeGen/PowerPC/spe-spills.ll Modified: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 7fbdf97793f7..bef9bcbdfaeb 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2062,7 +2062,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || - hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize)) || + (Subtarget.hasSPE() && hasSpills(MF) && !isInt<8>(StackSize))) { const TargetRegisterClass &GPRC = PPC::GPRCRegClass; const TargetRegisterClass &G8RC = PPC::G8RCRegClass; const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; diff --git a/llvm/test/CodeGen/PowerPC/spe-spills.ll b/llvm/test/CodeGen/PowerPC/spe-spills.ll new file mode 100644 index ..8fb0f2278fff --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/spe-spills.ll @@ -0,0 +1,721 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s +; Tests that spill slots are allocated for stacks larger than 256 bytes on +; powerpcspe targets + +@d = local_unnamed_addr global double* null, align 4 +@c = local_unnamed_addr global i32 0, align 4 +@g = local_unnamed_addr global double 0.00e+00, align 8 +@e = local_unnamed_addr global double* null, align 4 +@h = local_unnamed_addr global double 0.00e+00, align 8 +@j = local_unnamed_addr global double 0.00e+00, align 8 +@f = local_unnamed_addr global double 0.00e+00, align 8 +@a = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nounwind sspstrong uwtable +define i32 @k(double* nocapture readonly %l, double* nocapture %m, i32 %aa, i32 %ab, i32 %n, i32 %ac, i32 %ad) local_unnamed_addr #0 { +; CHECK-LABEL: k: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:stwu 1, -480(1) +; CHECK-NEXT:.cfi_def_cfa_offset 480 +; CHECK-NEXT:.cfi_offset r14, -72 +; CHECK-NEXT:.cfi_offset r15, -68 +; CHECK-NEXT:.cfi_offset r16, -64 +; CHECK-NEXT:.cfi_offset r17, -60 +; CHECK-NEXT:.cfi_offset r18, -56 +; CHECK-NEXT:.cfi_offset r19, -52 +; CHECK-NEXT:.cfi_offset r20, -48 +; CHECK-NEXT:.cfi_offset r21, -44 +; CHECK-NEXT:.cfi_offset r22, -40 +; CHECK-NEXT:.cfi_offset r23, -36 +; CHECK-NEXT:.cfi_offset r24, -32 +; CHECK-NEXT:.cfi_offset r25, -28 +; CHECK-NEXT:.cfi_offset r26, -24 +; CHECK-NEXT:.cfi_offset r27, -20 +; CHECK-NEXT:.cfi_offset r28, -16 +; CHECK-NEXT:.cfi_offset r29, -12 +; CHECK-NEXT:.cfi_offset r30, -8 +; CHECK-NEXT:.cfi_offset r31, -4 +; CHECK-NEXT:.cfi_offset r14, -224 +; CHECK-NEXT:.cfi_offset r15, -216 +; CHECK-NEXT:.cfi_offset r16, -208 +; CHECK-NEXT:.cfi_offset r17, -200 +; CHECK-NEXT:.cfi_offset r18, -192 +; CHECK-NEXT:.cfi_offset r19, -184 +; CHECK-NEXT:.cfi_offset r20, -176 +; CHECK-NEXT:.cfi_offset r21, -168 +; CHECK-NEXT:.cfi_offset r22, -160 +; CHECK-NEXT:.cfi_offset r23, -152 +; CHECK-NEXT:.cfi_offset r24, -144 +; CHECK-NEXT:.cfi_offset r25, -136 +; CHECK-NEXT:.cfi_offset r26, -128 +; CHECK-NEXT:.cfi_offset r27, -120 +; CHECK-NEXT:.cfi_offset r28, -112 +; CHECK-NEXT:.cfi_offset r29, -104 +; CHECK-NEXT:.cfi_offset r30, -96 +; CHECK-NEXT:.cfi_offset r31, -88 +; CHECK-NEXT:li 10, 256 +; CHECK-NEXT:cmpwi 7, 1 +; CHECK-NEXT:stw 14, 408(1) # 4-byte Folded Spill +; CHECK-NEXT:stw 15, 412(1) # 4-byte Folded Spill +; CHECK-NEXT:stw 16, 416(1) # 4-byte Folded Spill +; CHECK-NEXT:evstddx 14, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT:li 10, 264 +; CHECK-NEXT:stw 17, 420(1) # 4-byte Folded Spill +; CHECK-NEXT:stw 18, 424(1) # 4-byte Folded Spill +; CHECK-NEXT:stw 19, 428(1) # 4-byte Folded Spill +; CHECK-NEXT:evstddx 15, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT:li 10, 272 +; CHECK-NEXT:stw 20, 432(1) # 4-byte Folded Spill +; CH
[llvm-branch-commits] [llvm] 33e552d - PowerPC: Fix SPE f64 VAARG handling.
Author: Justin Hibbits Date: 2020-05-12T16:53:26-05:00 New Revision: 33e552d43617201799637faeabfdbe80e9d80a1b URL: https://github.com/llvm/llvm-project/commit/33e552d43617201799637faeabfdbe80e9d80a1b DIFF: https://github.com/llvm/llvm-project/commit/33e552d43617201799637faeabfdbe80e9d80a1b.diff LOG: PowerPC: Fix SPE f64 VAARG handling. SPE follows soft-float ABI for doubles, including VAARG passing. For soft-float, doubles are bitcast to i64, but for SPE they are not, so we need to perform GPR alignment explicitly for SPE f64. Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 4a733986412c..83a119fec86b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3229,7 +3229,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); - if (VT == MVT::i64) { + if (VT == MVT::i64 || (hasSPE() && VT == MVT::f64)) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 619fa4c - PowerPC: Don't hoist float multiply + add to fused operation on SPE
Author: Justin Hibbits Date: 2020-05-12T16:53:28-05:00 New Revision: 619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f URL: https://github.com/llvm/llvm-project/commit/619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f DIFF: https://github.com/llvm/llvm-project/commit/619fa4c9c852d438ec58bd7d4bedc9d1b8691a8f.diff LOG: PowerPC: Don't hoist float multiply + add to fused operation on SPE SPE doesn't have a fmadd instruction, so don't bother hoisting a multiply and add sequence to this, as it'd become just a library call. Hoisting happens too late for the CTR usability test to veto using the CTR in a loop, and results in an assert "Invalid PPC CTR loop!". Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 83a119fec86b..2da0665d5123 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15718,6 +15718,8 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const { + if (Subtarget.hasSPE()) +return false; switch (Ty->getScalarType()->getTypeID()) { case Type::FloatTyID: case Type::DoubleTyID: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libunwind] df0a004 - PowerPCSPE: Stop libunwind from complaining about SPE registers
Author: Justin Hibbits Date: 2020-05-12T16:53:27-05:00 New Revision: df0a004bf3796a9a2b2d60b2c8f96e996aa36639 URL: https://github.com/llvm/llvm-project/commit/df0a004bf3796a9a2b2d60b2c8f96e996aa36639 DIFF: https://github.com/llvm/llvm-project/commit/df0a004bf3796a9a2b2d60b2c8f96e996aa36639.diff LOG: PowerPCSPE: Stop libunwind from complaining about SPE registers * Match SPE "DWARF" register numbers to GCC's instead of official DWARF documentation. * Increase the register count to 148 from 112, even though the upper registers aren't saved (yet). Added: Modified: libunwind/include/__libunwind_config.h libunwind/include/libunwind.h llvm/lib/Target/PowerPC/PPCRegisterInfo.td Removed: diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 71d77ca65118..16bcf20d2ba0 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -16,7 +16,7 @@ #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86 8 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_X86_6432 -#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC 112 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC 148 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_PPC64 116 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64 95 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM 287 diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h index 23ef47f4ac83..35181378e0de 100644 --- a/libunwind/include/libunwind.h +++ b/libunwind/include/libunwind.h @@ -298,7 +298,39 @@ enum { UNW_PPC_VRSAVE = 109, UNW_PPC_VSCR= 110, UNW_PPC_SPE_ACC = 111, - UNW_PPC_SPEFSCR = 112 + UNW_PPC_SPEFSCR = 112, + UNW_PPC_RH0 = 117, + UNW_PPC_RH1 = 118, + UNW_PPC_RH2 = 119, + UNW_PPC_RH3 = 120, + UNW_PPC_RH4 = 121, + UNW_PPC_RH5 = 122, + UNW_PPC_RH6 = 123, + UNW_PPC_RH7 = 124, + UNW_PPC_RH8 = 125, + UNW_PPC_RH9 = 126, + UNW_PPC_RH10 = 117, + UNW_PPC_RH11 = 128, + UNW_PPC_RH12 = 129, + UNW_PPC_RH13 = 130, + UNW_PPC_RH14 = 131, + UNW_PPC_RH15 = 132, + UNW_PPC_RH16 = 133, + UNW_PPC_RH17 = 134, + UNW_PPC_RH18 = 135, + UNW_PPC_RH19 = 136, + UNW_PPC_RH20 = 137, + UNW_PPC_RH21 = 138, + UNW_PPC_RH22 = 139, + UNW_PPC_RH23 = 140, + UNW_PPC_RH24 = 141, + UNW_PPC_RH25 = 142, + UNW_PPC_RH26 = 143, + UNW_PPC_RH27 = 144, + UNW_PPC_RH28 = 145, + UNW_PPC_RH29 = 146, + UNW_PPC_RH30 = 147, + UNW_PPC_RH31 = 148 }; // 64-bit ppc register numbers diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index b45757c1acc5..c136939a527b 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -115,7 +115,7 @@ foreach Index = 0-31 in { // SPE registers foreach Index = 0-31 in { def S#Index : SPE("R"#Index), "r"#Index>, -DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>; +DwarfRegNum<[!add(Index, 117), !add(Index, 117)]>; } // Floating-point registers ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] bd71857 - PowerPC: Fix SPE extloadf32 handling.
Author: Justin Hibbits Date: 2020-05-12T16:53:29-05:00 New Revision: bd718572089afe9f4eb627ec153a2b33d781935f URL: https://github.com/llvm/llvm-project/commit/bd718572089afe9f4eb627ec153a2b33d781935f DIFF: https://github.com/llvm/llvm-project/commit/bd718572089afe9f4eb627ec153a2b33d781935f.diff LOG: PowerPC: Fix SPE extloadf32 handling. The patterns were incorrect copies from the FPU code, and are unnecessary, since there's no extended load for SPE. Just let LLVM itself do the work by marking it expand. Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCInstrSPE.td llvm/test/CodeGen/PowerPC/spe.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2da0665d5123..ac39eb5b71cc 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -323,6 +323,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FMA , MVT::f32, Legal); } + if (Subtarget.hasSPE()) +setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td index 935c3044ae47..e3bb64d7921e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -819,16 +819,6 @@ def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst), } // HasSPE -let Predicates = [HasSPE] in { -def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>; -def : Pat<(f64 (extloadf32 xaddr:$src)), - (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>; - -def : Pat<(f64 (fpextend f32:$src)), - (COPY_TO_REGCLASS $src, SPERC)>; -} - let Predicates = [HasSPE] in { def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crrc:$cond, spe4rc:$T, spe4rc:$F, diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 10a9571a9877..ab45e144d53b 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1422,3 +1422,64 @@ for.body: ; preds = %for.body, %entry declare float @llvm.fma.f32(float, float, float) #1 attributes #1 = { nounwind readnone speculatable willreturn } + +%struct.a = type { float, float } + +define void @d(%struct.a* %e, %struct.a* %f) { +; CHECK-LABEL: d: +; CHECK: # %bb.0: +; CHECK-NEXT:mflr 0 +; CHECK-NEXT:stw 0, 4(1) +; CHECK-NEXT:stwu 1, -48(1) +; CHECK-NEXT:.cfi_def_cfa_offset 48 +; CHECK-NEXT:.cfi_offset lr, 4 +; CHECK-NEXT:.cfi_offset r29, -12 +; CHECK-NEXT:.cfi_offset r30, -8 +; CHECK-NEXT:.cfi_offset r29, -40 +; CHECK-NEXT:.cfi_offset r30, -32 +; CHECK-NEXT:lwz 4, 0(4) +; CHECK-NEXT:lwz 3, 0(3) +; CHECK-NEXT:stw 29, 36(1) # 4-byte Folded Spill +; CHECK-NEXT:evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT:efdcfs 29, 4 +; CHECK-NEXT:stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT:evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT:efdcfs 30, 3 +; CHECK-NEXT:evmergehi 3, 29, 29 +; CHECK-NEXT:mr 4, 29 +; CHECK-NEXT:mtctr 3 +; CHECK-NEXT:# kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT:bctrl +; CHECK-NEXT:evmergehi 3, 30, 30 +; CHECK-NEXT:mr 4, 30 +; CHECK-NEXT:mtctr 3 +; CHECK-NEXT:# kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT:bctrl +; CHECK-NEXT:li 3, .LCPI58_0@l +; CHECK-NEXT:lis 4, .LCPI58_0@ha +; CHECK-NEXT:evlddx 3, 4, 3 +; CHECK-NEXT:evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT:lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT:efdmul 3, 29, 3 +; CHECK-NEXT:efscfd 3, 3 +; CHECK-NEXT:evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT:stw 3, 0(3) +; CHECK-NEXT:lwz 29, 36(1) # 4-byte Folded Reload +; CHECK-NEXT:lwz 0, 52(1) +; CHECK-NEXT:addi 1, 1, 48 +; CHECK-NEXT:mtlr 0 +; CHECK-NEXT:blr +entry: + %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 + %1 = load float, float* undef + %conv = fpext float %1 to double + %2 = load float, float* %0 + %g = fpext float %2 to double + %3 = call i32 undef(double %g) + %h = call i32 undef(double %conv) + %n = sitofp i32 %3 to double + %k = fmul double %g, %n + %l = fptrunc double %k to float + store float %l, float* undef + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b901070 - [PowerPC] Relax the restrictions on loading doubles with SPE
Author: Justin Hibbits Date: 2020-05-12T16:53:25-05:00 New Revision: b901070f406129e71f9057357486ef8e1101ad6d URL: https://github.com/llvm/llvm-project/commit/b901070f406129e71f9057357486ef8e1101ad6d DIFF: https://github.com/llvm/llvm-project/commit/b901070f406129e71f9057357486ef8e1101ad6d.diff LOG: [PowerPC] Relax the restrictions on loading doubles with SPE The original commit forced all 64-bit values to be loaded from indexed registers, regardless of how close they were located to a given base register. This relaxes that, and permits some to be immediate-indexed if they fit within a signed 255 (really 248, 8-byte aligned mask) byte window. Patch by kthomsen. Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 79fbb160e8bf..4a733986412c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2359,19 +2359,42 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } +/// isIntU8Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// zero (unsigned) extension from an 8-bit value. If so, this returns true and +/// the immediate. +bool llvm::isIntU8Immediate(SDNode *N, uint8_t &Imm) { + if (!isa(N)) +return false; + Imm = (uint8_t)cast(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) +return Imm == (int32_t)cast(N)->getZExtValue(); + else +return Imm == (int64_t)cast(N)->getZExtValue(); +} +bool llvm::isIntU8Immediate(SDValue Op, uint8_t &Imm) { + return isIntU8Immediate(Op.getNode(), Imm); +} -/// SelectAddressEVXRegReg - Given the specified address, check to see if it can -/// be represented as an indexed [r+r] operation. +/// SelectAddressEVXRegReg - Given the specified address, check to see if it +/// must be represented as an indexed [r+r] operation for EVLDD and EVSTD +/// instructions. If the address is known now, it will be checked if it fits +/// into the 8-bit offset, with an alignment of 8. bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { + const unsigned EVXEncodingAlignment = 8; for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) { if (MemSDNode *Memop = dyn_cast(*UI)) { if (Memop->getMemoryVT() == MVT::f64) { - Base = N.getOperand(0); - Index = N.getOperand(1); - return true; +uint8_t imm = 0; +if (isIntU8Immediate(N.getOperand(1), imm) && +!(imm % EVXEncodingAlignment)) + return false; // Offset is okay for the 8-bit index +Base = N.getOperand(0); +Index = N.getOperand(1); +return true; // Offset is unknown or too large, so use [r+r] } } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 41046df6c043..9235d3f30f4e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1265,6 +1265,9 @@ namespace llvm { bool isIntS16Immediate(SDNode *N, int16_t &Imm); bool isIntS16Immediate(SDValue Op, int16_t &Imm); + bool isIntU8Immediate(SDNode *N, uint8_t &Imm); + bool isIntU8Immediate(SDValue Op, uint8_t &Imm); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 95dcbfa - PowerPC: Don't lower SELECT_CC to PPCISD::FSEL on SPE
Author: Justin Hibbits Date: 2020-05-12T16:53:24-05:00 New Revision: 95dcbfa0e5ce701ac20b34f1028a398996df3f34 URL: https://github.com/llvm/llvm-project/commit/95dcbfa0e5ce701ac20b34f1028a398996df3f34 DIFF: https://github.com/llvm/llvm-project/commit/95dcbfa0e5ce701ac20b34f1028a398996df3f34.diff LOG: PowerPC: Don't lower SELECT_CC to PPCISD::FSEL on SPE Summary: SPE doesn't have a fsel instruction, so don't try to lower to it. This fixes a "Cannot select: tN: f64 = PPCISD::FSEL tX, tY, tZ" error. Reviewed By: lkail Differential Revision: https://reviews.llvm.org/D3 Added: llvm/test/CodeGen/PowerPC/spe-fastmath.ll Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9d49b3a1e069..79fbb160e8bf 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7980,9 +7980,9 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - // Not FP? Not a fsel. + // Not FP, or using SPE? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || - !Op.getOperand(2).getValueType().isFloatingPoint()) + !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE()) return Op; ISD::CondCode CC = cast(Op.getOperand(4))->get(); diff --git a/llvm/test/CodeGen/PowerPC/spe-fastmath.ll b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll new file mode 100644 index ..d2b83f7ee1da --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s + +define void @no_fsel(i32 %e) #0 { +; CHECK-LABEL: no_fsel: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 4, .LCPI0_0@l +; CHECK-NEXT:lis 5, .LCPI0_0@ha +; CHECK-NEXT:evlddx 4, 5, 4 +; CHECK-NEXT:efdcfui 3, 3 +; CHECK-NEXT:efdmul 5, 3, 3 +; CHECK-NEXT:efdcmpeq 0, 5, 4 +; CHECK-NEXT:ble 0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT:evor 3, 4, 4 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT:efdctsiz 3, 3 +; CHECK-NEXT:sth 3, 0(3) +; CHECK-NEXT:blr +entry: + %conv = uitofp i32 %e to double + %mul = fmul double %conv, %conv + %tobool = fcmp une double %mul, 0.00e+00 + %cond = select i1 %tobool, double %conv, double 0.00e+00 + %conv3 = fptosi double %cond to i16 + store i16 %conv3, i16* undef + ret void +} + +attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits