Author: Sjoerd Meijer Date: 2021-01-25T09:11:29Z New Revision: 815dd4b2920887741f905c5922e5bbf935348cce
URL: https://github.com/llvm/llvm-project/commit/815dd4b2920887741f905c5922e5bbf935348cce DIFF: https://github.com/llvm/llvm-project/commit/815dd4b2920887741f905c5922e5bbf935348cce.diff LOG: [AArch64] Add Cortex CPU subtarget features for instruction fusion. This adds subtarget features for AES, literal, and compare and branch instruction fusion for different Cortex CPUs. Patch by: Cassie Jones. Differential Revision: https://reviews.llvm.org/D94457 Added: Modified: llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64MacroFusion.cpp llvm/lib/Target/AArch64/AArch64Subtarget.h llvm/test/CodeGen/AArch64/misched-fusion-addr.ll llvm/test/CodeGen/AArch64/misched-fusion-aes.ll llvm/test/CodeGen/AArch64/misched-fusion-lit.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 15c7130b24f3..762855207d2b 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -218,6 +218,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature< "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", "CPU fuses arithmetic + cbz/cbnz operations">; +def FeatureCmpBccFusion : SubtargetFeature< + "cmp-bcc-fusion", "HasCmpBccFusion", "true", + "CPU fuses cmp+bcc operations">; + def FeatureFuseAddress : SubtargetFeature< "fuse-address", "HasFuseAddress", "true", "CPU fuses address generation and memory operations">; @@ -615,6 +619,9 @@ def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeatureRAS, FeatureRCPC, @@ -627,6 +634,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeaturePerfMon ]>; @@ -658,6 +666,7 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ HasV8_2aOps, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeatureRCPC, FeatureCrypto, @@ -669,7 +678,9 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", "Cortex-A77 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeatureRCPC, FeatureCrypto, FeatureFullFP16, @@ -680,6 +691,7 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78", "Cortex-A78 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, @@ -696,6 +708,7 @@ def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily", "CortexA78C", "Cortex-A78C ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureDotProd, FeatureFlagM, @@ -723,6 +736,7 @@ def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index 9a2103579a6a..f3b8ef16d6f9 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -21,7 +21,7 @@ namespace { /// CMN, CMP, TST followed by Bcc static bool isArithmeticBccPair(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, bool CmpOnly) { if (SecondMI.getOpcode() != AArch64::Bcc) return false; @@ -29,6 +29,13 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, if (FirstMI == nullptr) return true; + // If we're in CmpOnly mode, we only fuse arithmetic instructions that + // discard their result. + if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR || + FirstMI->getOperand(0).getReg() == AArch64::WZR)) { + return false; + } + switch (FirstMI->getOpcode()) { case AArch64::ADDSWri: case AArch64::ADDSWrr: @@ -380,8 +387,11 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, // All checking functions assume that the 1st instr is a wildcard if it is // unspecified. - if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI)) - return true; + if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) { + bool CmpOnly = !ST.hasArithmeticBccFusion(); + if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly)) + return true; + } if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI)) return true; if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI)) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index b34911eafdd2..8fe2f125982f 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -221,6 +221,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; + bool HasCmpBccFusion = false; bool HasFuseAddress = false; bool HasFuseAES = false; bool HasFuseArithmeticLogic = false; @@ -377,6 +378,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { } bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } + bool hasCmpBccFusion() const { return HasCmpBccFusion; } bool hasFuseAddress() const { return HasFuseAddress; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll index 694c8a0d0765..489cc849b908 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll index 95a419bd7398..ef01326f376c 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -2,8 +2,14 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78c| FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll index 74180d92089b..be67e9f852e1 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -1,6 +1,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits