https://github.com/azhan92 updated https://github.com/llvm/llvm-project/pull/99511
>From 52100e3378c86eddb9a4757f8f3e175804a6dc76 Mon Sep 17 00:00:00 2001 From: Alison Zhang <alisonzh...@ibm.com> Date: Thu, 18 Jul 2024 10:27:40 -0400 Subject: [PATCH 1/2] Add support for -mcpu=pwr11 / -mtune=pwr11 --- clang/lib/Basic/Targets/PPC.cpp | 39 ++++++++++++------- clang/lib/Basic/Targets/PPC.h | 19 ++++++--- clang/lib/Driver/ToolChains/Arch/PPC.cpp | 3 ++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- clang/test/Preprocessor/init-ppc64.c | 20 ++++++++++ llvm/lib/Target/PowerPC/PPC.td | 21 ++++++++-- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 1 + .../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 +- llvm/lib/TargetParser/Host.cpp | 1 + 10 files changed, 86 insertions(+), 25 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 4ba4a49311d36..9ff54083c923b 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -385,6 +385,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("_ARCH_PWR9"); if (ArchDefs & ArchDefinePwr10) Builder.defineMacro("_ARCH_PWR10"); + if (ArchDefs & ArchDefinePwr11) + Builder.defineMacro("_ARCH_PWR11"); if (ArchDefs & ArchDefineA2) Builder.defineMacro("_ARCH_A2"); if (ArchDefs & ArchDefineE500) @@ -622,10 +624,17 @@ bool PPCTargetInfo::initFeatureMap( addP10SpecificFeatures(Features); } - // Future CPU should include all of the features of Power 10 as well as any + // Power11 includes all the same features as Power10 plus any features + // specific to the Power11 core. + if (CPU == "pwr11" || CPU == "power11") { + initFeatureMap(Features, Diags, "pwr10", FeaturesVec); + addP11SpecificFeatures(Features); + } + + // Future CPU should include all of the features of Power 11 as well as any // additional features (yet to be determined) specific to it. if (CPU == "future") { - initFeatureMap(Features, Diags, "pwr10", FeaturesVec); + initFeatureMap(Features, Diags, "pwr11", FeaturesVec); addFutureSpecificFeatures(Features); } @@ -696,6 +705,10 @@ void PPCTargetInfo::addP10SpecificFeatures( Features["isa-v31-instructions"] = true; } +// Add any Power11 specific features. +void PPCTargetInfo::addP11SpecificFeatures( + llvm::StringMap<bool> &Features) const {} + // Add features specific to the "Future" CPU. void PPCTargetInfo::addFutureSpecificFeatures( llvm::StringMap<bool> &Features) const {} @@ -870,17 +883,17 @@ ArrayRef<TargetInfo::AddlRegName> PPCTargetInfo::getGCCAddlRegNames() const { } static constexpr llvm::StringLiteral ValidCPUNames[] = { - {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, - {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, - {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, - {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, - {"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"}, - {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"}, - {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"}, - {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"}, - {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"}, - {"powerpc"}, {"ppc"}, {"ppc32"}, {"powerpc64"}, {"ppc64"}, - {"powerpc64le"}, {"ppc64le"}, {"future"}}; + {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, + {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, + {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, + {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, + {"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"}, + {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"}, + {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"}, + {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"}, + {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"}, + {"power11"}, {"pwr11"}, {"powerpc"}, {"ppc"}, {"ppc32"}, + {"powerpc64"}, {"ppc64"}, {"powerpc64le"}, {"ppc64le"}, {"future"}}; bool PPCTargetInfo::isValidCPUName(StringRef Name) const { return llvm::is_contained(ValidCPUNames, Name); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index b15ab6fbcf492..6d5d8dd54d013 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -44,8 +44,9 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr8 = 1 << 12, ArchDefinePwr9 = 1 << 13, ArchDefinePwr10 = 1 << 14, - ArchDefineFuture = 1 << 15, - ArchDefineA2 = 1 << 16, + ArchDefinePwr11 = 1 << 15, + ArchDefineFuture = 1 << 16, + ArchDefineA2 = 1 << 17, ArchDefineE500 = 1 << 18 } ArchDefineTypes; @@ -166,11 +167,16 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + .Cases("power11", "pwr11", + ArchDefinePwr11 | ArchDefinePwr10 | ArchDefinePwr9 | + ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | + ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | + ArchDefinePpcgr | ArchDefinePpcsq) .Case("future", - ArchDefineFuture | ArchDefinePwr10 | ArchDefinePwr9 | - ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | - ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | - ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefineFuture | ArchDefinePwr11 | ArchDefinePwr10 | + ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 | + ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | + ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) .Cases("8548", "e500", ArchDefineE500) .Default(ArchDefineNone); } @@ -192,6 +198,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { const std::vector<std::string> &FeaturesVec) const override; void addP10SpecificFeatures(llvm::StringMap<bool> &Features) const; + void addP11SpecificFeatures(llvm::StringMap<bool> &Features) const; void addFutureSpecificFeatures(llvm::StringMap<bool> &Features) const; bool handleTargetFeatures(std::vector<std::string> &Features, diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 634c096523319..acd5757d6ea97 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -70,6 +70,7 @@ static std::string normalizeCPUName(StringRef CPUName, const llvm::Triple &T) { .Case("power8", "pwr8") .Case("power9", "pwr9") .Case("power10", "pwr10") + .Case("power11", "pwr11") .Case("future", "future") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") @@ -103,6 +104,8 @@ const char *ppc::getPPCAsmModeForCPU(StringRef Name) { .Case("power9", "-mpower9") .Case("pwr10", "-mpower10") .Case("power10", "-mpower10") + .Case("pwr11", "-mpower11") + .Case("power11", "-mpower11") .Default("-many"); } diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index a5f9ffa21220a..4d6759dd81537 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -57,7 +57,7 @@ // RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC // PPC: error: unknown target CPU 'not-a-cpu' -// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}} +// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, power11, pwr11, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}} // RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS // MIPS: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c index 42e5232824de7..a53397226b8d5 100644 --- a/clang/test/Preprocessor/init-ppc64.c +++ b/clang/test/Preprocessor/init-ppc64.c @@ -632,6 +632,25 @@ // PPCPOWER10:#define __PCREL__ 1 // PPCPOWER10-NOT:#define __ROP_PROTECT__ 1 // +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu pwr11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu power11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s +// +// PPCPOWER11:#define _ARCH_PPC 1 +// PPCPOWER11:#define _ARCH_PPC64 1 +// PPCPOWER11:#define _ARCH_PPCGR 1 +// PPCPOWER11:#define _ARCH_PPCSQ 1 +// PPCPOWER11:#define _ARCH_PWR10 1 +// PPCPOWER11:#define _ARCH_PWR11 1 +// PPCPOWER11:#define _ARCH_PWR4 1 +// PPCPOWER11:#define _ARCH_PWR5 1 +// PPCPOWER11:#define _ARCH_PWR5X 1 +// PPCPOWER11:#define _ARCH_PWR6 1 +// PPCPOWER11-NOT:#define _ARCH_PWR6X 1 +// PPCPOWER11:#define _ARCH_PWR7 1 +// PPCPOWER11:#define _ARCH_PWR8 1 +// PPCPOWER11:#define _ARCH_PWR9 1 +// PPCPOWER11-NOT:#define __ROP_PROTECT__ 1 +// // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu future -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCFUTURE %s // // PPCFUTURE:#define _ARCH_PPC 1 @@ -639,6 +658,7 @@ // PPCFUTURE:#define _ARCH_PPCGR 1 // PPCFUTURE:#define _ARCH_PPCSQ 1 // PPCFUTURE:#define _ARCH_PWR10 1 +// PPCFUTURE:#define _ARCH_PWR11 1 // PPCFUTURE:#define _ARCH_PWR4 1 // PPCFUTURE:#define _ARCH_PWR5 1 // PPCFUTURE:#define _ARCH_PWR5X 1 diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 84ef582c029d3..41d2ee328b8b8 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -52,6 +52,7 @@ def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">; def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">; +def DirectivePwr11: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR11", "">; def DirectivePwrFuture : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">; @@ -467,13 +468,25 @@ def ProcessorFeatures { list<SubtargetFeature> P10Features = !listconcat(P10InheritableFeatures, P10SpecificFeatures); - // Future - // For future CPU we assume that all of the existing features from Power10 + // Power11 + // For P11 CPU we assume that all the existing features from Power10 // still exist with the exception of those we know are Power10 specific. + list<SubtargetFeature> P11AdditionalFeatures = + [DirectivePwr11]; + list<SubtargetFeature> P11SpecificFeatures = + []; + list<SubtargetFeature> P11InheritableFeatures = + !listconcat(P10InheritableFeatures, P11AdditionalFeatures); + list<SubtargetFeature> P11Features = + !listconcat(P11InheritableFeatures, P11SpecificFeatures); + + // Future + // For future CPU we assume that all of the existing features from Power11 + // still exist with the exception of those we know are Power11 specific. list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture]; list<SubtargetFeature> FutureSpecificFeatures = []; list<SubtargetFeature> FutureInheritableFeatures = - !listconcat(P10InheritableFeatures, FutureAdditionalFeatures); + !listconcat(P11InheritableFeatures, FutureAdditionalFeatures); list<SubtargetFeature> FutureFeatures = !listconcat(FutureInheritableFeatures, FutureSpecificFeatures); } @@ -672,6 +685,8 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>; +// No scheduler model yet. +def : ProcessorModel<"pwr11", NoSchedModel, ProcessorFeatures.P11Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index a11ab93b8db3c..0520d75fda31e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1469,6 +1469,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR8: case PPC::DIR_PWR9: case PPC::DIR_PWR10: + case PPC::DIR_PWR11: case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index bf35f8ec151b1..2079dc0acc3cf 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -61,6 +61,7 @@ enum { DIR_PWR8, DIR_PWR9, DIR_PWR10, + DIR_PWR11, DIR_PWR_FUTURE, DIR_64 }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 3fa35efc2d159..b7bdbeb535d52 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -504,7 +504,7 @@ unsigned PPCTTIImpl::getCacheLineSize() const { // Assume that Future CPU has the same cache line size as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || - Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE) return 128; // On other processors return a default of 64 bytes. @@ -538,7 +538,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) { // Assume that future is the same as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || - Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE) return 12; // For most things, modern systems have two execution units (and diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 82c1731f58f0a..5d85386e5b359 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -150,6 +150,7 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") .Case("POWER10", "pwr10") + .Case("POWER11", "pwr11") // FIXME: If we get a simulator or machine with the capabilities of // mcpu=future, we should revisit this and add the name reported by the // simulator/machine. >From e55375b12aa1da16d27d8d888d707669041303f6 Mon Sep 17 00:00:00 2001 From: Alison Zhang <alisonzh...@ibm.com> Date: Fri, 19 Jul 2024 10:19:09 -0500 Subject: [PATCH 2/2] Suggestions from reviewer --- llvm/lib/Target/PowerPC/PPC.td | 3 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 + llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 3 +- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 24 +- llvm/lib/TargetParser/Host.cpp | 6 + llvm/test/CodeGen/PowerPC/check-cpu.ll | 6 +- ...{mma-acc-spill.ll => mma-acc-spill-p10.ll} | 0 .../test/CodeGen/PowerPC/mma-acc-spill-p11.ll | 122 ++++++ llvm/test/CodeGen/PowerPC/p11-constants.ll | 392 ++++++++++++++++++ llvm/unittests/TargetParser/Host.cpp | 1 + 10 files changed, 551 insertions(+), 8 deletions(-) rename llvm/test/CodeGen/PowerPC/{mma-acc-spill.ll => mma-acc-spill-p10.ll} (100%) create mode 100644 llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll create mode 100644 llvm/test/CodeGen/PowerPC/p11-constants.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 41d2ee328b8b8..da31a993b9c69 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -685,8 +685,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>; -// No scheduler model yet. -def : ProcessorModel<"pwr11", NoSchedModel, ProcessorFeatures.P11Features>; +def : ProcessorModel<"pwr11", P10Model, ProcessorFeatures.P11Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0520d75fda31e..258dc907fb2b0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16665,6 +16665,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR8: case PPC::DIR_PWR9: case PPC::DIR_PWR10: + case PPC::DIR_PWR11: case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -18047,6 +18048,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { return true; case PPC::DIR_PWR9: case PPC::DIR_PWR10: + case PPC::DIR_PWR11: case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2d3c520429f2a..c8d8f97231514 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3485,7 +3485,8 @@ unsigned PPCInstrInfo::getSpillTarget() const { // With P10, we may need to spill paired vector registers or accumulator // registers. MMA implies paired vectors, so we can just check that. bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops(); - return Subtarget.isISAFuture() ? 3 : IsP10Variant ? + // P11 uses the P10 target. + return Subtarget.isISAFuture() ? 4 : IsP10Variant ? 2 : Subtarget.hasP9Vector() ? 1 : 0; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 1e2687f92c61e..ea34b459251f4 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -124,6 +124,14 @@ enum PPCMachineCombinerPattern : unsigned { PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \ } +#define Pwr11LoadOpcodes \ + { \ + PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ + PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ + PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ + PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \ + } + #define FutureLoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ @@ -156,6 +164,14 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, NoInstr, PPC::SPILL_QUADWORD \ } +#define Pwr11StoreOpcodes \ + { \ + PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ + PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ + PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ + NoInstr, NoInstr, PPC::SPILL_QUADWORD \ + } + #define FutureStoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ @@ -166,17 +182,17 @@ enum PPCMachineCombinerPattern : unsigned { // Initialize arrays for load and store spill opcodes on supported subtargets. #define StoreOpcodesForSpill \ - { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, FutureStoreOpcodes } + { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, Pwr11StoreOpcodes, FutureStoreOpcodes } #define LoadOpcodesForSpill \ - { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, FutureLoadOpcodes } + { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, Pwr11LoadOpcodes, FutureLoadOpcodes } class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; const PPCRegisterInfo RI; - const unsigned StoreSpillOpcodesArray[4][SOK_LastOpcodeSpill] = + const unsigned StoreSpillOpcodesArray[5][SOK_LastOpcodeSpill] = StoreOpcodesForSpill; - const unsigned LoadSpillOpcodesArray[4][SOK_LastOpcodeSpill] = + const unsigned LoadSpillOpcodesArray[5][SOK_LastOpcodeSpill] = LoadOpcodesForSpill; void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 5d85386e5b359..db9183dafee22 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1550,6 +1550,12 @@ StringRef sys::getHostCPUName() { case 0x40000: #endif return "pwr10"; +#ifdef POWER_11 + case POWER_11: +#else + case 0x80000: +#endif + return "pwr11"; default: return "generic"; } diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll index e1a201427a410..1dc532cb428f4 100644 --- a/llvm/test/CodeGen/PowerPC/check-cpu.ll +++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll @@ -3,6 +3,10 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=future < %s 2>&1 | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 < %s 2>&1 | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 < %s 2>&1 | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 < %s 2>&1 | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 < %s 2>&1 | FileCheck %s @@ -13,7 +17,7 @@ -; Test -mcpu=[pwr9|pwr10|future] is recognized on PowerPC. +; Test -mcpu=[pwr9|pwr10|pwr11|future] is recognized on PowerPC. ; CHECK-NOT: is not a recognized processor for this target ; CHECK: .text diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll similarity index 100% rename from llvm/test/CodeGen/PowerPC/mma-acc-spill.ll rename to llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll new file mode 100644 index 0000000000000..8bf609a84aa02 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; ; This test is a copy of mma-acc-spill.ll except that it uses mcpu=pwr11. +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) +declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) +declare void @foo() +define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) { +; CHECK-LABEL: intrinsics1: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 176 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: .cfi_offset v28, -80 +; CHECK-NEXT: .cfi_offset v29, -64 +; CHECK-NEXT: .cfi_offset v30, -48 +; CHECK-NEXT: .cfi_offset v31, -32 +; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: vmr v28, v2 +; CHECK-NEXT: xxlor vs0, v28, v28 +; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: xxlor vs1, v29, v29 +; CHECK-NEXT: xxlor vs2, v30, v30 +; CHECK-NEXT: xxlor vs3, v31, v31 +; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; CHECK-NEXT: ld r30, 272(r1) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2pp acc0, v2, v4 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxvp vsp0, 64(r1) +; CHECK-NEXT: stxvp vsp2, 32(r1) +; CHECK-NEXT: bl foo@notoc +; CHECK-NEXT: lxvp vsp0, 64(r1) +; CHECK-NEXT: lxvp vsp2, 32(r1) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2pp acc0, v28, v30 +; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r30) +; CHECK-NEXT: stxv vs1, 32(r30) +; CHECK-NEXT: stxv vs2, 16(r30) +; CHECK-NEXT: stxv vs3, 0(r30) +; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: intrinsics1: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: stdu r1, -256(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_offset 256 +; CHECK-BE-NEXT: .cfi_offset lr, 16 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: .cfi_offset v28, -80 +; CHECK-BE-NEXT: .cfi_offset v29, -64 +; CHECK-BE-NEXT: .cfi_offset v30, -48 +; CHECK-BE-NEXT: .cfi_offset v31, -32 +; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: vmr v29, v3 +; CHECK-BE-NEXT: vmr v28, v2 +; CHECK-BE-NEXT: xxlor vs0, v28, v28 +; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill +; CHECK-BE-NEXT: vmr v31, v5 +; CHECK-BE-NEXT: vmr v30, v4 +; CHECK-BE-NEXT: xxlor vs1, v29, v29 +; CHECK-BE-NEXT: xxlor vs2, v30, v30 +; CHECK-BE-NEXT: xxlor vs3, v31, v31 +; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: ld r30, 368(r1) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxvp vsp0, 112(r1) +; CHECK-BE-NEXT: stxvp vsp2, 144(r1) +; CHECK-BE-NEXT: bl foo +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lxvp vsp0, 112(r1) +; CHECK-BE-NEXT: lxvp vsp2, 144(r1) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30 +; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r30) +; CHECK-BE-NEXT: stxv vs0, 0(r30) +; CHECK-BE-NEXT: stxv vs3, 48(r30) +; CHECK-BE-NEXT: stxv vs2, 32(r30) +; CHECK-BE-NEXT: ld r30, 240(r1) # 8-byte Folded Reload +; CHECK-BE-NEXT: addi r1, r1, 256 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr + %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4) + %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3) + tail call void @foo() + %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3) + store <512 x i1> %3, ptr %ptr, align 64 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/p11-constants.ll b/llvm/test/CodeGen/PowerPC/p11-constants.ll new file mode 100644 index 0000000000000..f1ca6401c8791 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p11-constants.ll @@ -0,0 +1,392 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK32 + +; These test cases aim to test constant materialization using the pli instruction on Power11. + +define signext i32 @t_16BitsMinRequiring34Bits() { +; CHECK-LABEL: t_16BitsMinRequiring34Bits: +; CHECK: pli r3, 32768 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16BitsMinRequiring34Bits: +; CHECK32: pli r3, 32768 +; CHECK32-NEXT: blr + +entry: + ret i32 32768 +} + +define signext i32 @t_16Bits() { +; CHECK-LABEL: t_16Bits: +; CHECK: pli r3, 62004 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16Bits: +; CHECK32: pli r3, 62004 +; CHECK32-NEXT: blr + +entry: + ret i32 62004 +} + +define signext i32 @t_lt32gt16BitsNonShiftable() { +; CHECK-LABEL: t_lt32gt16BitsNonShiftable: +; CHECK: pli r3, 1193046 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt32gt16BitsNonShiftable: +; CHECK32: pli r3, 1193046 +; CHECK32-NEXT: blr + +entry: + ret i32 1193046 +} + +define signext i32 @t_32Bits() { +; CHECK-LABEL: t_32Bits: +; CHECK: pli r3, -231451016 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32Bits: +; CHECK32: pli r3, -231451016 +; CHECK32-NEXT: blr + +entry: + ret i32 -231451016 +} + +define i64 @t_34BitsLargestPositive() { +; CHECK-LABEL: t_34BitsLargestPositive: +; CHECK: pli r3, 8589934591 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34BitsLargestPositive: +; CHECK32: li r3, 1 +; CHECK32-NEXT: li r4, -1 +; CHECK32-NEXT: blr + +entry: + ret i64 8589934591 +} + +define i64 @t_neg34Bits() { +; CHECK-LABEL: t_neg34Bits: +; CHECK: pli r3, -8284514696 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neg34Bits: +; CHECK32: li r3, -2 +; CHECK32-NEXT: pli r4, 305419896 +; CHECK32-NEXT: blr + +entry: + ret i64 -8284514696 +} + +define signext i32 @t_16BitsMinRequiring34BitsMinusOne() { +; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne: +; CHECK: li r3, 32767 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16BitsMinRequiring34BitsMinusOne: +; CHECK32: li r3, 32767 +; CHECK32-NEXT: blr + +entry: + ret i32 32767 +} + +define signext i32 @t_lt16Bits() { +; CHECK-LABEL: t_lt16Bits: +; CHECK: li r3, 291 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt16Bits: +; CHECK32: li r3, 291 +; CHECK32-NEXT: blr + +entry: + ret i32 291 +} + +define signext i32 @t_neglt16Bits() { +; CHECK-LABEL: t_neglt16Bits: +; CHECK: li r3, -3805 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neglt16Bits: +; CHECK32: li r3, -3805 +; CHECK32-NEXT: blr + +entry: + ret i32 -3805 +} + +define signext i32 @t_neg16Bits() { +; CHECK-LABEL: t_neg16Bits: +; CHECK: li r3, -32204 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neg16Bits: +; CHECK32: li r3, -32204 +; CHECK32-NEXT: blr + +entry: + ret i32 -32204 +} + +define signext i32 @t_lt32gt16BitsShiftable() { +; CHECK-LABEL: t_lt32gt16BitsShiftable: +; CHECK: lis r3, 18 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt32gt16BitsShiftable: +; CHECK32: lis r3, 18 +; CHECK32-NEXT: blr + +entry: + ret i32 1179648 +} + +define signext i32 @t_32gt16BitsShiftable() { +; CHECK-LABEL: t_32gt16BitsShiftable: +; CHECK: lis r3, -3532 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32gt16BitsShiftable: +; CHECK32: lis r3, -3532 +; CHECK32-NEXT: blr + +entry: + ret i32 -231473152 +} + +define signext i32 @t_32BitsZero() { +; CHECK-LABEL: t_32BitsZero: +; CHECK: li r3, 0 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32BitsZero: +; CHECK32: li r3, 0 +; CHECK32-NEXT: blr + +entry: + ret i32 0 +} + +define signext i32 @t_32BitsAllOnes() { +; CHECK-LABEL: t_32BitsAllOnes: +; CHECK: li r3, -1 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32BitsAllOnes: +; CHECK32: li r3, -1 +; CHECK32-NEXT: blr + +entry: + ret i32 -1 +} + +define i64 @t_34BitsLargestPositivePlus() { +; CHECK-LABEL: t_34BitsLargestPositivePlus: +; CHECK: li r3, 1 +; CHECK-NEXT: rldic r3, r3, 33, 30 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34BitsLargestPositivePlus: +; CHECK32: li r3, 2 +; CHECK32-NEXT: li r4, 0 +; CHECK32-NEXT: blr + +entry: + ret i64 8589934592 +} + +define i64 @t_34Bits() { +; CHECK-LABEL: t_34Bits: +; CHECK: pli r3, 1648790223 +; CHECK-NEXT: rldic r3, r3, 3, 30 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34Bits: +; CHECK32: li r3, 3 +; CHECK32-NEXT: pli r4, 305419896 +; CHECK32-NEXT: blr + +entry: + ret i64 13190321784 +} + +define i64 @t_35Bits() { +; CHECK-LABEL: t_35Bits: +; CHECK: pli r3, 4266035919 +; CHECK-NEXT: rldic r3, r3, 3, 29 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_35Bits: +; CHECK32: li r3, 7 +; CHECK32-NEXT: pli r4, -231451016 +; CHECK32-NEXT: blr + +entry: + ret i64 34128287352 +} + +; (Value >> Shift) can be expressed in 34 bits +define i64 @t_Shift() { +; CHECK-LABEL: t_Shift: +; CHECK: pli r3, 8522759166 +; CHECK-NEXT: rotldi r3, r3, 48 +; CHECK-NEXT: blr + +entry: + ; 0xFBFE00000001FBFE + ret i64 18157950747604548606 +} + +; Leading Zeros + Following Ones + Trailing Zeros > 30 +define i64 @t_LZFOTZ() { +; CHECK-LABEL: t_LZFOTZ: +; CHECK: pli r3, -349233 +; CHECK-NEXT: rldic r3, r3, 4, 12 +; CHECK-NEXT: blr + +entry: + ; 0x000FFFFFFFAABCF0 + ret i64 4503599621782768 +} + +; Leading Zeros + Trailing Ones > 30 +define i64 @t_LZTO() { +; CHECK-LABEL: t_LZTO: +; CHECK: pli r3, -2684406441 +; CHECK-NEXT: rldicl r3, r3, 11, 19 +; CHECK-NEXT: blr +entry: + ; 0x00001AFFF9AABFFF + ret i64 29686707699711 +} + +; Leading Zeros + Trailing Ones + Following Zeros > 30 +define i64 @t_LZTOFO() { +; CHECK-LABEL: t_LZTOFO: +; CHECK: pli r3, -5720033968 +; CHECK-NEXT: rldicl r3, r3, 11, 12 +; CHECK-NEXT: blr +entry: + ; 0x000FF55879AA87FF + ret i64 4491884997806079 +} + +; Requires full expansion +define i64 @t_Full64Bits1() { +; CHECK-LABEL: t_Full64Bits1: +; CHECK: pli r4, 2146500607 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x7FF0FFFFF0FFF0FE + ret i64 9219149911952453886 +} + +; Requires full expansion +define i64 @t_Ful64Bits2() { +; CHECK-LABEL: t_Ful64Bits2: +; CHECK: pli r4, 4042326015 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0xF0F0FFFFF0FFF0FE + ret i64 17361658038238310654 +} + +; A splat of 32 bits: 32 Bits Low == 32 Bits High +define i64 @t_Splat32Bits() { +; CHECK-LABEL: t_Splat32Bits: +; CHECK: pli r3, 262916796 +; CHECK-NEXT: rldimi r3, r3, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x0FABCABC0FABCABC + ret i64 1129219040652020412 +} + +; Producing `pli` when the constant fits within 34-bits and the constant +; is being produced in other transformations (such as complex bit permutations). +define i64 @t_34Bits_Complex(i64 %a, i64 %b) { +; CHECK-LABEL: t_34Bits_Complex: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rotldi r4, r4, 30 +; CHECK-NEXT: rldimi r3, r4, 34, 31 +; CHECK-NEXT: pli r4, -268435457 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: blr +; +; CHECK32-LABEL: t_34Bits_Complex: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: rlwinm r4, r6, 0, 4, 2 +; CHECK32-NEXT: rlwimi r3, r5, 0, 31, 29 +; CHECK32-NEXT: blr +entry: + %and = and i64 %a, 8589934592 + %and1 = and i64 %b, -8858370049 + %or = or i64 %and1, %and + ret i64 %or +} + +; The load immediates resulting from phi-nodes are needed to test whether +; li/lis is preferred to pli by the instruction selector. +define dso_local void @t_phiNode() { +; CHECK-LABEL: t_phiNode: +; CHECK: lis r6, 18 +; CHECK-NEXT: li r5, 291 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: cmpwi r3, 1 +; CHECK-NEXT: li r3, -1 +; CHECK: pli r6, 2147483647 +; CHECK-NEXT: pli r5, 1193046 +; CHECK-NEXT: pli r4, 32768 +; CHECK-NEXT: pli r3, -231451016 +; CHECK32-LABEL: t_phiNode: +; CHECK32: lis r6, 18 +; CHECK32-NEXT: li r5, 291 +; CHECK32-NEXT: li r4, 0 +; CHECK32-NEXT: cmpwi r3, 1 +; CHECK32-NEXT: li r3, -1 +; CHECK32: pli r6, 2147483647 +; CHECK32-NEXT: pli r5, 1193046 +; CHECK32-NEXT: pli r4, 32768 +; CHECK32-NEXT: pli r3, -231451016 + +entry: + br label %while.body + +while.body: ; preds = %if.else.i, %entry + br label %while.body.i + +while.body.i: ; preds = %sw.epilog.i, %while.body + %a.1.i = phi i32 [ %a.2.i, %sw.epilog.i ], [ -1, %while.body ] + %b.1.i = phi i32 [ %b.2.i, %sw.epilog.i ], [ 0, %while.body ] + %c.1.i = phi i32 [ %c.2.i, %sw.epilog.i ], [ 291, %while.body ] + %d.1.i = phi i32 [ %d.2.i, %sw.epilog.i ], [ 1179648, %while.body ] + %0 = load i8, ptr null, align 1 + %cmp1.i = icmp eq i8 %0, 1 + br i1 %cmp1.i, label %if.then.i, label %if.else.i + +if.then.i: ; preds = %while.body.i + switch i8 undef, label %sw.default.i [ + i8 3, label %sw.epilog.i + i8 2, label %sw.bb1.i + ] + +sw.bb1.i: ; preds = %if.then.i + br label %sw.epilog.i + +sw.default.i: ; preds = %if.then.i + unreachable + +sw.epilog.i: ; preds = %sw.bb2.i, %sw.bb1.i, %if.then.i + %a.2.i = phi i32 [ -231451016, %sw.bb1.i ], [ %a.1.i, %if.then.i ] + %b.2.i = phi i32 [ 32768, %sw.bb1.i ], [ %b.1.i, %if.then.i ] + %c.2.i = phi i32 [ 1193046, %sw.bb1.i ], [ %c.1.i, %if.then.i ] + %d.2.i = phi i32 [ 2147483647, %sw.bb1.i ], [ %d.1.i, %if.then.i ] + br label %while.body.i + +if.else.i: ; preds = %while.body.i + call void @func2(i32 signext %a.1.i, i32 signext %b.1.i, i32 signext %c.1.i, i32 signext %d.1.i) + br label %while.body +} + +declare void @func2(i32, i32, i32, i32) diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index 61921a99e1711..c85be8476c5b6 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -536,6 +536,7 @@ TEST(HostTest, AIXHostCPUDetect) { .Case("POWER 8\n", "pwr8") .Case("POWER 9\n", "pwr9") .Case("POWER 10\n", "pwr10") + .Case("POWER 11\n", "pwr11") .Default("unknown"); StringRef HostCPU = sys::getHostCPUName(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits