https://github.com/wxz2020 updated https://github.com/llvm/llvm-project/pull/91022
>From 8aebe46d7fdd15f02a9716718f53b03056ef0d19 Mon Sep 17 00:00:00 2001 From: Wei Zhao <wez...@qti.qualcomm.com> Date: Fri, 3 May 2024 22:01:58 +0000 Subject: [PATCH 1/2] [AArch64] Add support for Qualcomm Oryon processor --- clang/test/Driver/aarch64-oryon-1.c | 19 + clang/test/Misc/target-invalid-cpu-note.c | 4 +- .../llvm/TargetParser/AArch64TargetParser.h | 5 + llvm/lib/Target/AArch64/AArch64.td | 5 + llvm/lib/Target/AArch64/AArch64Processors.td | 30 + llvm/lib/Target/AArch64/AArch64SchedOryon.td | 1727 +++++++++++++++++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 7 + llvm/lib/TargetParser/Host.cpp | 1 + llvm/unittests/TargetParser/Host.cpp | 3 + .../TargetParser/TargetParserTest.cpp | 16 +- 10 files changed, 1813 insertions(+), 4 deletions(-) create mode 100644 clang/test/Driver/aarch64-oryon-1.c create mode 100644 llvm/lib/Target/AArch64/AArch64SchedOryon.td diff --git a/clang/test/Driver/aarch64-oryon-1.c b/clang/test/Driver/aarch64-oryon-1.c new file mode 100644 index 000000000000..952ba5df74ba --- /dev/null +++ b/clang/test/Driver/aarch64-oryon-1.c @@ -0,0 +1,19 @@ +// RUN: %clang -target aarch64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s +// RUN: %clang -target aarch64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s +// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix %s +// RUN: %clang -target aarch64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s +// RUN: %clang -target aarch64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s +// RUN: %clang -target aarch64_be -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=Phoenix-TUNE %s +// Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "oryon-1" "-target-feature" "+v8.6a" +// Phoenix-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{(--)?}}"{{.*}} "-target-cpu" "generic" + +// RUN: %clang -target arm64 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s +// RUN: %clang -target arm64 -mlittle-endian -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix %s +// RUN: %clang -target arm64 -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s +// RUN: %clang -target arm64 -mlittle-endian -mtune=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-Phoenix-TUNE %s +// ARM64-Phoenix: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "oryon-1" "-target-feature" "+v8.6a" +// ARM64-Phoenix-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target aarch64 -mcpu=oryon-1 -mtune=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s +// RUN: %clang -target aarch64 -mtune=cortex-a53 -mcpu=oryon-1 -### -c %s 2>&1 | FileCheck -check-prefix=MCPU-MTUNE-Phoenix %s +// MCPU-MTUNE-Phoenix: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "oryon-1" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 768b243b04e3..a71ebd6a023e 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -5,11 +5,11 @@ // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 // AARCH64: error: unknown target CPU 'not-a-cpu' -// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}} +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, cobalt-100, grace{{$}} +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 04fbaf07adfb..e2682bc4b331 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -786,6 +786,11 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::AEK_SHA2, AArch64::AEK_AES, AArch64::AEK_MTE, AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_CSSC})}, + {"oryon-1", ARMV8_6A, + (AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_CRYPTO, + AArch64::AEK_RAND, AArch64::AEK_SM4, + AArch64::AEK_SHA3, AArch64::AEK_SHA2, + AArch64::AEK_PROFILE}))}, }; // Name alias. diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 4b2ce0d73949..5708b6173750 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -85,6 +85,10 @@ def SMEUnsupported : AArch64Unsupported { SME2Unsupported.F); } +def MTEUnsupported : AArch64Unsupported { + let F = [HasMTE]; +} + let F = [HasPAuth, HasPAuthLR] in def PAUnsupported : AArch64Unsupported; @@ -109,6 +113,7 @@ include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" include "AArch64SchedNeoverseV2.td" +include "AArch64SchedOryon.td" include "AArch64Processors.td" diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index f2286ae17dba..eca9eb859448 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -616,6 +616,27 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", FeatureLdpAlignedOnly, FeatureStpAlignedOnly]>; +def TuneOryon : SubtargetFeature<"oryon-1", "ARMProcFamily", + "Oryon", + "Nuvia Inc Oryon processors", [ + FeatureCrypto, + FeatureFPARMv8, + FeatureNEON, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize, + FeatureFuseCryptoEOR, + FeatureFuseAddress, + FeatureSM4, + FeatureSHA2, + FeatureSHA3, + FeatureAES, + FeatureFullFP16, + FeatureFP16FML, + FeaturePerfMon, + FeatureSPE, + FeaturePostRAScheduler, + HasV8_6aOps]>; def ProcessorFeatures { list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, @@ -805,6 +826,11 @@ def ProcessorFeatures { FeatureSHA3, FeatureAES, FeatureCSSC, FeatureWFxT, FeatureFullFP16]; + list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureCrypto, FeatureRandGen, + FeaturePAuth, FeatureSM4, FeatureSHA2, + FeatureSHA3, FeatureAES]; + // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not // affect code generated by the compiler and can be used only by explicitly @@ -987,3 +1013,7 @@ def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, [TuneAmpere1B]>; + +// Qualcomm Oryon +def : ProcessorModel<"oryon-1", OryonModel, ProcessorFeatures.Oryon, + [TuneOryon]>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedOryon.td b/llvm/lib/Target/AArch64/AArch64SchedOryon.td new file mode 100644 index 000000000000..063cc8681e2b --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedOryon.td @@ -0,0 +1,1727 @@ +//=- AArch64SchedOryon.td - Nuvia Inc Oryon CPU 001 ---*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Nuvia Inc Oryon +// family of processors. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pipeline Description. + +def OryonModel : SchedMachineModel { + let IssueWidth = 14; // 14 micro-ops dispatched at a time. IXU=6, LSU=4, VXU=4 + let MicroOpBufferSize = 376; // 192 (48x4) entries in micro-op re-order buffer in VXU. + // 120 ((20+20)x3) entries in micro-op re-order buffer in IXU + // 64 (16+16)x2 re-order buffer in LSU + // total 373 + let LoadLatency = 4; // 4 cycle Load-to-use from L1D$ + // LSU=5 NEON load + let MispredictPenalty = 13; // 13 cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + SMEUnsupported.F, + MTEUnsupported.F, + PAUnsupported.F, + [HasPAuth, HasCSSC]); + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +let SchedModel = OryonModel in { + +// Issue ports. +// IXU has 6 ports p0 ~ p5 +// LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3 +// VXU has 4 ports p12 ~ p15 + +// cross IXU/LSU/VXU resource group for FMOV P41 of VXU +// I2V +def ORYONI4FP0 : ProcResource<1>; +def ORYONI5FP1 : ProcResource<1>; +// V2I +def ORYONFP0I4 : ProcResource<1>; +def ORYONFP1I5 : ProcResource<1>; + +// store 1 for normal store instructions +def ORYONST0 : ProcResource<1>; +// store 2 for normal store instructions +def ORYONST1 : ProcResource<1>; + +// Port 0: ALU/Indirect/Direct Branch. +def ORYONP0 : ProcResource<1>; + +// Port 1: ALU/Direct Branch. +def ORYONP1 : ProcResource<1>; + +// Port 2: ALU. +def ORYONP2 : ProcResource<1>; + +// Port 3: ALU. +def ORYONP3 : ProcResource<1>; + +// Port 4: ALU. +def ORYONP4 : ProcResource<1> { + let Super = ORYONI4FP0; + let Super = ORYONFP0I4; } + +// Port 5: ALU. +def ORYONP5 : ProcResource<1> { + let Super = ORYONI5FP1; + let Super = ORYONFP1I5; } + +// Port 6: Load/Store. LS0 +def ORYONP6 : ProcResource<1> { + let Super = ORYONST0; } + +// Port 7: Load/store. LS1 +def ORYONP7 : ProcResource<1> { + let Super = ORYONST0; } + +// Port 8: Load/Store. LS2 +def ORYONP8 : ProcResource<1> { + let Super = ORYONST1; } + +// Port 9: Load/store. LS3 +def ORYONP9 : ProcResource<1> { + let Super = ORYONST1; } + +// Port 10: Load/Store. STD0 +def ORYONP10SD0 : ProcResource<1> { + let Super = ORYONST0; } + +// Port 11: Load/store. STD1 +def ORYONP11SD1 : ProcResource<1> { + let Super = ORYONST1; } + +// Port 12: FP/Neon/SIMD/Crypto. +def ORYONP12FP0 : ProcResource<1> { + let Super = ORYONI4FP0; + let Super = ORYONFP0I4; } + +// Port 13: FP/Neon/SIMD/Crypto. +def ORYONP13FP1 : ProcResource<1> { + let Super = ORYONI5FP1; + let Super = ORYONFP1I5; } + +// Port 14: FP/Neon/SIMD/Crypto. +def ORYONP14FP2 : ProcResource<1>; + +// Port 15: FP/Neon/SIMD/Crypto. +def ORYONP15FP3 : ProcResource<1>; + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes. + +// Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5. +def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2, + ORYONP3, ORYONP4, ORYONP5]> { + let BufferSize = 120; +} + +// Direct Conditional Branch instructions on ports I0/I1. +def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> { + let BufferSize = 40; +} + +// Indirect/crypto Conditional Branch instructions on ports I0. +def ORYONI0 : ProcResGroup<[ORYONP0]> { + let BufferSize = 20; +} + +// Crypto/CRC/PAU instructions on ports I2. +def ORYONI2 : ProcResGroup<[ORYONP2]> { + let BufferSize = 20; +} + +// Multiply/Multiply-ADD instructions on ports I4/I5. +def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> { + let BufferSize = 40; +} + +// Divide instructions on ports I5. +def ORYONI5 : ProcResGroup<[ORYONP5]> { + let BufferSize = 20; +} + +// Comparison instructions on ports I0/I1/I2/I3. +def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1, + ORYONP2, ORYONP3]> { + let BufferSize = 80; +} + +// Load instructions on ports P6/P7/P8/P9. +def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> { + let BufferSize = 64; +} + +// Store instructions on combo of STA/STD pipes +def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> { + let BufferSize = 64; +} + +// Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3. +def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1, + ORYONP14FP2, ORYONP15FP3]> { + let BufferSize = 192; +} + +// FP Comparison and F/I move instructions on ports FP0/FP1. +def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> { + let BufferSize = 96; +} + +// FDIV instructions on ports FP3. +def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> { + let BufferSize = 48; +} + +// CRYP-SHA instructions on ports FP1. +def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> { + let BufferSize = 48; +} + +def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> { + let BufferSize = 48; +} + +// Reciprocal, Squre root on FP0. +def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> { + let BufferSize = 48; +} + +// cross IXU/LSU/VXU resource group for FMOV P41 of VXU +// I2V +def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> { + let BufferSize = 40; +} + +// V2I +def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> { + let BufferSize = 96; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>. + +// Because of the complexity of Oryon CPU, we skip the following +// generic definitions and define each instruction specifically + +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes<WriteImm, []> { let Unsupported = 1; } +def : WriteRes<WriteI, []> { let Unsupported = 1; } +def : WriteRes<WriteISReg, []> { let Unsupported = 1; } +def : WriteRes<WriteIEReg, []> { let Unsupported = 1; } +def : WriteRes<WriteExtr, []> { let Unsupported = 1; } +def : WriteRes<WriteIS, []> { let Unsupported = 1; } +def : WriteRes<WriteID32, []> { let Unsupported = 1; } +def : WriteRes<WriteID64, []> { let Unsupported = 1; } +def : WriteRes<WriteIM32, []> { let Unsupported = 1; } +def : WriteRes<WriteIM64, []> { let Unsupported = 1; } +def : WriteRes<WriteBr, []> { let Unsupported = 1; } +def : WriteRes<WriteBrReg, []> { let Unsupported = 1; } +def : WriteRes<WriteLD, []> { let Unsupported = 1; } +def : WriteRes<WriteST, []> { let Unsupported = 1; } +def : WriteRes<WriteSTP, []> { let Unsupported = 1; } +def : WriteRes<WriteAdr, []> { let Unsupported = 1; } +def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; } +def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; } +def : WriteRes<WriteF, []> { let Unsupported = 1; } +def : WriteRes<WriteFCmp, []> { let Unsupported = 1; } +def : WriteRes<WriteFCvt, []> { let Unsupported = 1; } +def : WriteRes<WriteFCopy, []> { let Unsupported = 1; } +def : WriteRes<WriteFImm, []> { let Unsupported = 1; } +def : WriteRes<WriteFMul, []> { let Unsupported = 1; } +def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } +def : WriteRes<WriteVd, []> { let Unsupported = 1; } +def : WriteRes<WriteVq, []> { let Unsupported = 1; } +def : WriteRes<WriteVLD, []> { let Unsupported = 1; } +def : WriteRes<WriteVST, []> { let Unsupported = 1; } +def : WriteRes<WriteSys, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Unsupported = 1; } +def : WriteRes<WriteHint, []> { let Unsupported = 1; } +def : WriteRes<WriteLDHi, []> { let Unsupported = 1; } +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +// These ReadAdvance entries will be defined in later implementation +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; +def : ReadAdvance<ReadST, 0>; + + +//IXU resource definition +// 1 cycles NO pipe +def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>; + +// 1 cycles on I01. +def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>; + +def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> { + let NumMicroOps = 2; +} + +def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>; + +// 7 cycles on I2. PAC*/AUT* instructions +def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> { + let Latency = 7; +} + +// 7 cycles on I2. PAC*/AUT* instructions +def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions +// these instructions are broken down to three uops +// a. PtrAuth on pipe 2 taking 7 cycles +// b. Link Register Update on pipes 0 and 1 taking 1 cycle +// c. Indirect branch on pipe 0 taking 1 cycle + +def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> { + let Latency = 9; + let NumMicroOps = 3; +} + +// 3 cycles on I2. CRC32 and CRC32C instructions +def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> { + let Latency = 3; +} + +// 1 cycle on I012345 +def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>; + +// 1 cycle on I0123 +def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>; + +// 1 cycle on 2 of I012345 +def ORYONWrite_1Cyc_I012345_I012345 : +SchedWriteRes<[ORYONI012345, ORYONI012345]> ; + +// 2 cycle on 2 of I0123 with ReleaseAtCycles +def ORYONWrite_2Cyc_I0123_I0123_RC : +SchedWriteRes<[ORYONI0123, ORYONI0123]> { + let Latency = 2; + let ReleaseAtCycles = [2,2]; +} + +// 2 cycle on 2 of I012345 +def ORYONWrite_2Cyc_I012345_I012345_RC : +SchedWriteRes<[ORYONI012345, ORYONI012345]> { + let Latency = 2; + let ReleaseAtCycles = [2,2]; +} + +// 3 cycle on 2 of I45 +def ORYONWrite_3Cyc_I45_I45_RC : +SchedWriteRes<[ORYONI45, ORYONI45]> { + let Latency = 3; + let ReleaseAtCycles = [2,2]; +} + +// 3 cycle on I45 +def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> { + let Latency = 3; +} + +// 7 cycle on I2 32-bit integer division +def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { + let Latency = 7; + let ReleaseAtCycles = [2]; +} + +// 9 cycle on I2 64-bit integer division +def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { + let Latency = 9; + let ReleaseAtCycles = [2]; +} + +// LSU resource definition +// need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX +// 4 cycle on LS(P6789) +def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 4; +} + +// 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre +def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 4; +} + +// 5 (4+1) for VXU SIMD access/could also include FP +// resource might not be correct, as VXU resource not included +def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; +} + +def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 5; +} + +def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> { + let Latency = 5; + let NumMicroOps = 10; +} + +// 6 cycle for Post/Pre inc/dec access +def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; +} + +def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 5; +} + +def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { + let Latency = 5; + let NumMicroOps = 10; +} + +// 1 cycle for all generic stores +def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>; + +def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 2; +} + +def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 3; +} + +def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 4; +} + +def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 5; +} + +def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 6; +} + +def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 8; +} + +def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> { + let NumMicroOps = 10; +} + +// 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access +// also includes Pair store until further informed +def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 3; +} + +def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 2; +} + +def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 3; +} + +def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 4; +} + +def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 5; +} + +def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 6; +} + +def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 8; +} + +def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { + let NumMicroOps = 10; +} + +// VXU resource definition + +// I2V instruction has 1 uOp +// I2v with convert has 2 uOps +// all I2V, V2I's throughputs are 2 +// On VXU doc, p37 -- latencies and throughput +// P41, resource taken, P42, uOps +def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> { + let Latency = 4; +} + +// inline a FCVT, so add one more uOp +def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> { + let Latency = 7; + let NumMicroOps = 2; +} + +// V2I move instruction has 1/2 uOps, P42 in VXU doc +// Latency is 3, FCVT is also 3 cycle +// move + convert is 6 (3+3) cycles +// throughput is 2 +def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { + let Latency = 3; +} + +// inline a FCVT, so add one more uOp +def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 2; +} + +def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 3; +} + +def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 4; +} + +def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> { + let Latency = 3; +} + +def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 3; +} + +def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { + let Latency = 2; +} + +def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> { + let Latency = 2; +} + +// 2 cycle on FP1 +def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { + let Latency = 2; +} + +// 3 cycle on FP1 +def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { + let Latency = 3; +} + +// 4 cycle , 0.5 throughput on FP1 +def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> { + let Latency = 4; + let ReleaseAtCycles = [4]; +} + +// 5 cycle , 1 throughput on FP1 +def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { + let Latency = 5; +} + +// 8 cycle , 2 throughput on FP0123 +def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> { + let Latency = 8; + let ReleaseAtCycles = [2]; +} + +def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { + let Latency = 6; +} + +def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { + let Latency = 7; +} + +def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { + let Latency = 8; +} + +def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { + let Latency = 9; +} + +def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { + let Latency = 10; +} + +def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { + let Latency = 8; + let ReleaseAtCycles = [2]; +} + +def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { + let Latency = 10; + let ReleaseAtCycles = [2]; +} + +def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { + let Latency = 13; + let ReleaseAtCycles = [2]; +} + +def ORYONWrite_4Cyc_FP0123_RC : +SchedWriteRes<[ORYONFP0123]> { + let Latency = 4; + let ReleaseAtCycles = [2]; +} + +def ORYONWrite_4Cyc_FP0123_FP0123_RC : +SchedWriteRes<[ORYONFP0123, ORYONFP0123]> { + let Latency = 4; + let NumMicroOps = 2; + let ReleaseAtCycles = [2,2]; +} + +def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC : +SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> { + let Latency = 4; + let NumMicroOps = 3; + let ReleaseAtCycles = [3,3,3]; +} + +def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC : +SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> { + let Latency = 6; + let NumMicroOps = 4; + let ReleaseAtCycles = [6,6,6,6]; +} + +//===----------------------------------------------------------------------===// +// Instruction Tables in IXU +//===----------------------------------------------------------------------===// + +//--- +// Arithmetic Instructions +//--- + +// Table on P74 +//============= + +//1, 1, 6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>; + +//2,2,3 +def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], + (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>; + +//1,1,4 alias CMP, CMN on page 75 +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>; + +//2,2,2 alias CMP, CMN on page 75 +def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], + (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>; + +//1,1,4 +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^ADC(W|X)r","^SBC(W|X)r", + "^ADCS(W|X)r","^SBCS(W|X)r")>; + +//1,1,2 +def : InstRW<[ORYONWrite_1Cyc_2Uops_I01], + (instrs ADR,ADRP)>; + +//1,1,4 +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^CSEL(W|X)r", "^CSINV(W|X)r", + "^CSNEG(W|X)r", "^CSINC(W|X)r")>; + +//--- +//Compare Instruciton +//--- + +// Table on P75 +//============= + +// We have CCMP, CCMN as LLVM DAG node +// CMP is an alias of SUBS as above +// CMN is an alias of ADDS as above +// We also have no way to get shift compare node in LLVM +//2,2,1.5 CMP, CMN + +//1,1,4 +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>; + +//--- +// Branch +//--- + +// Table on P76 +//============= +def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>; +def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>; +def : InstRW<[ORYONWrite_1Cyc_I01], + (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; +def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>; +def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>; + +// Table on p47/p76 +// 3 uOp, 1 cycle for branch, 7 cycle for Authentication, +// 1 cycle for updating link register +// V8.3a PAC +def : InstRW<[ORYONWrite_9Cyc_I012], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, + BRAA, BRAAZ, BRAB, BRABZ)>; +def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>; + +// we temporarily put it here. It needs fix. +// should LSU get involved? Need check? +// Should be a combined additive value of load and pauth +def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>; + +// Logical Instructions +//--- + +// Table on P77 +//============= + +//1,1,4 TST is an alias of ANDS +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>; + +//2,2,2 TST shift is an alias +def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], + (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>; + +//1,1,6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)", + "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)", + "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>; + +//2,2,3 +def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], + (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs", + "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>; + + +//--- +// Shift Instructions +//--- + +// Table on P78 +//============= + +//1,1,6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^ASRV(W|X)r", "^LSLV(W|X)r", + "^LSRV(W|X)r", "^RORV(W|X)r", + "RMIF")>; + +//--- +// Move-Data Bit-field and Sign_Extension Instructions +//--- + +// Table on P78 +//============= + +//1,1,6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^MOVK(W|X)i", "^MOVN(W|X)i", + "^MOVZ(W|X)i", "^SBFM(W|X)ri", + "^UBFM(W|X)ri", "^BFM(W|X)ri", + "^SXT(W|B|H|X)", "^UXT(H|B)")>; + +// COPY instruction is an LLVM internal DAG node, needs further study +def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>; + +//--- +// Reverse Instructions +//--- + +// Table on P79 +//============= + + +//1,1,6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>; + + +//--- +// Flag Manipulate Instructions +//--- + +// Table on P79 +//============= + +//1,1,4 +def : InstRW<[ORYONWrite_1Cyc_I0123], + (instregex "^SETF8", "^SETF16", "^CFINV")>; + +//--- +// Miscellaneous Instructions +//--- + +// Table on P80 +//============= + +//1,1,6 +def : InstRW<[ORYONWrite_1Cyc_I012345], + (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>; + + +//--- +// Multiply Instructions +//--- + +// Table on P81 +//============= + + +//1,3,2 +def : InstRW<[ORYONWrite_3Cyc_I45], + (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr", + "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr", + "^(S|U)MULHrr")>; + +//--- +// Divide Instructions +//--- + +// Table on P81 +//============= + +def : InstRW<[ORYONWrite_7Cyc_I2_RC], + (instregex "^(S|U)DIVWr")>; + +def : InstRW<[ORYONWrite_9Cyc_I2_RC], + (instregex "^(S|U)DIVXr")>; + + +//--- +// Cryptgraphy Instructions +// +//1,3,1 on I2 +def : InstRW<[ORYONWrite_3Cyc_I2], + (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>; + +//--- +// PAU instructions +//--- + +// on p47 of IXU document, we have 7 cycles for all PAU instructions +// here we just assume all signing and pauth instructions are 7 cycles +// assume all are 7 cycles here + +// signing instrucitons +def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB, + PACDA, PACDB, + PACIZA, PACIZB, + PACDZA, PACDZB, + PACGA)>; +// authentication instrucitons +def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB, + AUTDA, AUTDB, + AUTIZA, AUTIZB, + AUTDZA, AUTDZB)>; +def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>; + +//===----------------------------------------------------------------------===// +// Instruction Tables in LSU +//===----------------------------------------------------------------------===// + +// 4 cycle Load-to-use from L1D$ +// Neon load with 5 cycle +// 6 cycle to STA ? +// STD cycle ? +// NEON STD + 2 + +// Load Instructions +// FP Load Instructions + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPDpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPQpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPSpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPWpre)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPDpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPQpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPSpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPWpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], + (instrs LDPXpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>; +def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>; + +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>; +def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>; + + + +// Store register, immed post-index +// NOTE: Handled by WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteST + +// Store pair, immed post-index, W-form +// Store pair, immed post-indx, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteSTP. + +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>; + +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>; + +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>; + +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>; + +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>; +def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>; + +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STPDpre, STPDpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STPSpre, STPSpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STPWpre, STPWpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRBpre, STRBpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRDpre, STRDpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRHpre, STRHpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRQpre, STRQpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRSpre, STRSpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRWpre, STRWpost)>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRBroW, STRBroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRDroW, STRDroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRHroW, STRHroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRQroW, STRQroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRSroW, STRSroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRWroW, STRWroX)>; +def : InstRW<[ORYONWrite_1Cyc_ST], + (instrs STRXroW, STRXroX)>; + +// ASIMD Load instructions, 4 cycle access + 2 cycle NEON access +// ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps +// ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps +def : InstRW<[ORYONWrite_5Cyc_LD], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_LD_I012345], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps +// ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], + (instregex "^LD1Twov(8b|4h|2s|1d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], + (instregex "^LD1Twov(16b|8h|4s|2d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], + (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], + (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps +// ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], + (instregex "^LD1Threev(8b|4h|2s|1d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], + (instregex "^LD1Threev(16b|8h|4s|2d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], + (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], + (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps +// ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], + (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], + (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; + +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], + (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], + (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S 2uOps +// ASIMD load, 1 element, one lane, D 2UOps +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps +// ASIMD load, 1 element, all lanes, D-form, D 2uOps +// ASIMD load, 1 element, all lanes, Q-form 2uOps +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps +// ASIMD load, 2 element, multiple, Q-form, D 4 uOps +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], + (instregex "^LD2Twov(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], + (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], + (instregex "^LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], + (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H 3 uOps +// ASIMD load, 2 element, one lane, S 3 uOps +// ASIMD load, 2 element, one lane, D 3 uOps +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps +// ASIMD load, 2 element, all lanes, D-form, D 3 uOps +// ASIMD load, 2 element, all lanes, Q-form 3 uOps +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S 5 uOps +// ASIMD load, 3 element, multiple, Q-form, B/H/S 6 uOps +// ASIMD load, 3 element, multiple, Q-form, D 6 uOps +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], + (instregex "^LD3Threev(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], + (instregex "^LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], + (instregex "^LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], + (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H 4 uOps +// ASIMD load, 3 element, one lane, S 4 uOps +// ASIMD load, 3 element, one lane, D 5 uOps +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], + (instregex "^LD3i(8|16|32)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], + (instregex "^LD3i(64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps +// ASIMD load, 3 element, all lanes, D-form, D 5 uOps +// ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps +// ASIMD load, 3 element, all lanes, Q-form, D 5 uOps +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], + (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], + (instregex "^LD3Rv(1d|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], + (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], + (instregex "^LD3Rv(1d|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S 6 uOps +// ASIMD load, 4 element, multiple, Q-form, B/H/S 10 uOps +// ASIMD load, 4 element, multiple, Q-form, D 8 uOps +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], + (instregex "^LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_5Cyc_10Uops_LD], + (instregex "^LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[ORYONWrite_5Cyc_8Uops_LD], + (instregex "^LD4Fourv(2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], + (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345], + (instregex "^LD4Fourv(2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H 5 uOps +// ASIMD load, 4 element, one lane, S 5 uOps +// ASIMD load, 4 element, one lane, D 6 uOps +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], + (instregex "^LD4i(8|16|32)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], + (instregex "^LD4i(64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S 5 uOps +// ASIMD load, 4 element, all lanes, D-form, D 6 uOps +// ASIMD load, 4 element, all lanes, Q-form, B/H/S 5 uOps +// ASIMD load, 4 element, all lanes, Q-form, D 6 uOps +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], + (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], + (instregex "^LD4Rv(1d|2d)$")>; +def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], + (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>; +def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], + (instregex "^LD4Rv(1d|2d)_POST$")>; + +// ASIMD Store Instructions +// ASIMD store, 1 element, multiple, 1 reg, D-form 1 uOps +// ASIMD store, 1 element, multiple, 1 reg, Q-form 1 uops +def : InstRW<[ORYONWrite_1Cyc_ST], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_ST_I012345], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form 2 uOps +// ASIMD store, 1 element, multiple, 2 reg, Q-form 2 uOps +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form 3 uOps +// ASIMD store, 1 element, multiple, 3 reg, Q-form 3 uOps +def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form 4 uOps +// ASIMD store, 1 element, multiple, 4 reg, Q-form 4 uOps +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S 2 uOps +// ASIMD store, 1 element, one lane, D 2 uOps +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S 2 uOps +// ASIMD store, 2 element, multiple, Q-form, B/H/S 4 uOps +// ASIMD store, 2 element, multiple, Q-form, D 4 uOps +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], + (instregex "^ST2Twov(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], + (instregex "^ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], + (instregex "^ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], + (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S 2 uOps +// ASIMD store, 2 element, one lane, D 2 uOps +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S 4 uOps +// ASIMD store, 3 element, multiple, Q-form, B/H/S 6 uOps +// ASIMD store, 3 element, multiple, Q-form, D 6 uOps +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], + (instregex "^ST3Threev(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_1Cyc_6Uops_ST], + (instregex "^ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], + (instregex "^ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345], + (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H 2 uOps +// ASIMD store, 3 element, one lane, S 2 uOps +// ASIMD store, 3 element, one lane, D 4 uOps +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>; +def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], + (instregex "^ST3i(8|16|32)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], + (instregex "^ST3i(64)_POST$")>; + + +// ASIMD store, 4 element, multiple, D-form, B/H/S 5 uOps +// ASIMD store, 4 element, multiple, Q-form, B/H/S 10 uOps +// ASIMD store, 4 element, multiple, Q-form, D 8 uOps +def : InstRW<[ORYONWrite_1Cyc_5Uops_ST], + (instregex "^ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[ORYONWrite_1Cyc_10Uops_ST], + (instregex "^ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[ORYONWrite_1Cyc_8Uops_ST], + (instregex "^ST4Fourv(2d)$")>; +def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345], + (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345], + (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345], + (instregex "^ST4Fourv(2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H 3 uOps +// ASIMD store, 4 element, one lane, S 3 uOps +// ASIMD store, 4 element, one lane, D 4 uOps +def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>; +def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], + (instregex "^ST4i(8|16|32)_POST$")>; +def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], + (instregex "^ST4i(64)_POST$")>; + + +//===----------------------------------------------------------------------===// +// Instruction Tables in VXU +//===----------------------------------------------------------------------===// +// all uOps are not clearly written in the VXU document + +// I2V +def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>; + +// I2V with convert +def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; + +// V2I +def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>; + +// V2I with convert 2nd [SU] necessary? +def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; + +// float to float move immediate, row 7 in big chart +def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>; +def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>; + +// float to float conversion within VXU, precision conversion +def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>; +def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r", + "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// floating comparison +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMAX(NM)?")>; +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMIN(NM)?")>; + +// floating comparison write to NZCV +def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>; +def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>; + +// floating point conditional select +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>; + +// floating multiply-add +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB", + "^(F|FN)MUL")>; + +// floating unary, cycle/throughput? xls row14 +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>; + +//floating division/square root +def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>; +def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>; +def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>; + +def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>; +def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>; +def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>; + +//========== +// SIMD move instructions +//========== + +// ASIMD DUP element +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>; +// ASIMD DUP general thoughput undecided, 3? FP0123 +// VXU doc, p42, 2 uOps +def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>; + +// ASIMD insert, element to element +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>; +// ASIMD insert, gen reg 3? FP0123? +def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>; + +// ASIMD move, FP immed +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>; + +//========== +// SIMD arithmetic instructions +//========== +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv", + "^BIFv", "^BITv", "^BSLv", + "^ANDv", "^BICv", "^EORv", + "^ORRv", "^ORNv")>; + + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// floating division +def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>; +def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>; +def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>; + +// FMAX, FMIN +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// floating multiply-add this is 4/3 need to fine tune +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLA(L|L2)?v", + "^FMLS(L|L2)?v")>; + +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v", + "^FRECPSv", "^FRSQRTSv")>; + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv", + "^PMULv", "UABAv")>; + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv", + "^EXTv", "^TRN(1|2)v", + "^(SH|UH)(ADD|SUB)v", + "^S(MAX|MIN)v", + "^(SQ|UQ)(ADD|SUB)v", + // no such instruction "^SQ(R)?((MULH|MLAH|MLSH)v", + "^(SQ|SQR|UQ|UQR)SHLv", + "^(SR|UR)HADDv", + "^(S|SR|U|UR)SHLv", + "^UABDv", + "^U(MAX|MIN)v")>; +// IMAX or UMAX in the above line +//========== +// SIMD compare instructions +//========== + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv", + "^CMHSv", + "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv", + "^FACGEv", "^FACGTv")>; + +//========== +// SIMD widening and narrowing arithmetic instructions +//========== +// NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished +// from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32). +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv", + "^SUBHNv", + "^RADDHNv", + "^RSUBHNv", + "^SABD(L|L2)v", "^UABD(L|L2)v", + "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>; + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v", + "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>; + +//========== +// SIMD unary arithmetic instructions +//========== +//^MVNv is an alias of ^NOTv +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv", + "^NEGv", "^NOTv", + "^RBITv", "^REV(16|32|64)v", + "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v", + "^(SU|US)QADDv", "^(S|U)SHL(L|L2)v", + "^UQXT(N|N2)v", "^XTN2?v")>; + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v", + "^FRINT[AIMNPXZ]v", + "^FRSQRTEv", + "^(S|U)ADALPv", + "^(S|U)ADDLPv")>; + + +def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv", + "^FRECPEv", "^FRECPXv")>; + +def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>; +def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>; +def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>; + +//========== +// SIMD binary elememt arithmetic instructions +//========== + +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv","^FMULX?v", + "^F(MLA|MLS)(L|L2)v")>; + + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv", "^MLSv", "^MULv", + "^(S|U|SQD)(MLA|MLS|MUL)(L|L2)v", + "^SQDMULHv", + "^SQRD(MLA|MLS|MUL)Hv")>; + +//========== +// SIMD permute instructions +//========== + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v", + "^UZP(1|2)v", "^ZIP(1|2)v")>; + +//========== +// SIMD immediate instructions +//========== + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv", "^MOVIv", + "^MVNIv")>; + +//========== +// SIMD shift(immediate) instructions +//========== +// "^(S|U)XT(L|L2)v" counted as unary instruction as SHL +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv", + "^(SHL|SHR)(N|N2)v", + "^SLIv", + "^(SQ|SQR)SHR(U)?(N|N2)v", + "^(UQ|UQR)SHR(N|N2)v", + "^SQSHL(U)?v", + "^UQSHLv", + "^SRIv", + "^(S|SR|U|UR)SHRv", + "^(S|SR|U|UR)SRAv", + "^(S|U)SHL(L|L2)v")>; + +//========== +// SIMD floating-point and integer conversion instructions +//========== +// same as above conversion + +//========== +// SIMD reduce (acoss vector lanes) instructions +//========== + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv", + "^(FMAX|FMIN)(V|NMV)v", + "^(S|U)ADDLVv", + "^(S|U)(MAX|MIN)Vv")>; +//========== +// SIMD pairwise arithmetic instructions +//========== + +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv", + "^(FMAX|FMIN)(NMP|P)v", + "^(S|U)(MIN|MAX)Pv")>; +//========== +// SIMD dot prodcut instructions +//========== + +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>; + +//========== +// SIMD table lookup instructions +//========== +// TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2 +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One, + TBXv8i8One, TBXv16i8One, + TBLv8i8Two, TBLv16i8Two)>; + +// TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4 +def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC], + (instrs TBLv8i8Three, TBLv16i8Three, + TBLv8i8Four, TBLv16i8Four)>; + + +// TBX 2-reg 2 uOps, throughput=2 latency=4 +def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>; + +// TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6 +def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC], + (instrs TBXv8i8Three, TBXv16i8Three, + TBXv8i8Four, TBXv16i8Four)>; + + +//========== +// SIMD complex number arithmetic instructions +//========== + +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>; + +//========== +// SIMD cryptographic instructions +//========== +// 3,4 on IMLA, CRYP +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]", + "^PMULLv", + "^SM3(TT1|TT2)(A|B)")>; + +// 2,4 on CRYP +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC", + "^EOR3", + "^RAX1", + "^XAR", + "^BCAX", + "^SM3SS1", + "^SM3PART(W1|W2)")>; +// 5,1 on CRYP +def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E", + "^SM4EKEY")>; + +// 2,1 on CRYP +def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)", + "^SHA256SU0", + "^SHA512(SU0|SU1)")>; + +// 3,1 on CRYP +def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1", + "^SHA512(H|H2)")>; + +// 4,0.25 on CRYP +def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)", + "^SHA256(H|H2)")>; + +//========== +// SIMD v8.6 instructions +//========== +// 4,2 on IMLA +def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>; + +// 4,0.5 on IMLA FIX ME!!! +def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>; + +// FIX ME !!! no definition in the BIG Chart yet +def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>; + +// 3,4 +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>; + +// 3,1 +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>; + +// 3,4 +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>; + + +} // SchedModel = OryonModel diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 5d185fcaefc4..7d2df695ccb2 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -305,6 +305,13 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { PrefLoopAlignment = Align(64); MaxInterleaveFactor = 4; break; + case Oryon: + CacheLineSize = 64; + PrefFunctionAlignment = Align(16); + MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + break; } if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 834f4536f93a..c57632dd5665 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -302,6 +302,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver .Case("0xc00", "falkor") .Case("0xc01", "saphira") + .Case("0x001", "oryon-1") .Default("generic"); if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. // The Exynos chips have a convoluted ID scheme that doesn't seem to follow diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index 6aa1d7a087eb..61921a99e171 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -125,6 +125,9 @@ TEST(getLinuxHostCPUName, AArch64) { EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0xc0\n" "CPU part : 0xac5"), "ampere1b"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n" + "CPU part : 0x001"), + "oryon-1"); // MSM8992/4 weirdness StringRef MSM8992ProcCpuInfo = R"( diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 816aea44a9bc..c4b95c544c65 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1815,11 +1815,23 @@ INSTANTIATE_TEST_SUITE_P( {AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP, AArch64::AEK_SIMD, AArch64::AEK_FP16, AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM}), - "8.2-A")), + "8.2-A"), + ARMCPUTestParams<AArch64::ExtensionBitset>( + "oryon-1", "armv8.6-a", "crypto-neon-fp-armv8", + (AArch64::ExtensionBitset( + {AArch64::AEK_CRC, AArch64::AEK_FP, AArch64::AEK_PAUTH, + AArch64::AEK_FCMA, AArch64::AEK_JSCVT, AArch64::AEK_SIMD, + AArch64::AEK_RAS, AArch64::AEK_LSE, AArch64::AEK_RDM, + AArch64::AEK_RCPC, AArch64::AEK_DOTPROD, AArch64::AEK_SM4, + AArch64::AEK_SHA3, AArch64::AEK_BF16, AArch64::AEK_SHA2, + AArch64::AEK_AES, AArch64::AEK_I8MM, AArch64::AEK_RAND, + AArch64::AEK_PROFILE, AArch64::AEK_CRYPTO})), + "8.6-A")), + ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 76; +static constexpr unsigned NumAArch64CPUArchs = 77; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector<StringRef, NumAArch64CPUArchs> List; >From 241be3c0c76553af138ded968eff942ba0bd4838 Mon Sep 17 00:00:00 2001 From: Wei Zhao <wez...@qti.qualcomm.com> Date: Tue, 7 May 2024 16:44:45 +0000 Subject: [PATCH 2/2] Code Review Adjustments -- turn on duplication def instruction ON, and remove some engineering notes --- llvm/lib/Target/AArch64/AArch64SchedOryon.td | 83 +++----------------- 1 file changed, 10 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SchedOryon.td b/llvm/lib/Target/AArch64/AArch64SchedOryon.td index 063cc8681e2b..e54c46ae69d2 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedOryon.td +++ b/llvm/lib/Target/AArch64/AArch64SchedOryon.td @@ -33,8 +33,6 @@ def OryonModel : SchedMachineModel { MTEUnsupported.F, PAUnsupported.F, [HasPAuth, HasCSSC]); - // FIXME: Remove when all errors have been fixed. - let FullInstRWOverlapCheck = 0; } let SchedModel = OryonModel in { @@ -289,7 +287,7 @@ def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> { let NumMicroOps = 3; } -// 8 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions +// 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions // these instructions are broken down to three uops // a. PtrAuth on pipe 2 taking 7 cycles // b. Link Register Update on pipes 0 and 1 taking 1 cycle @@ -677,9 +675,6 @@ SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> { // Arithmetic Instructions //--- -// Table on P74 -//============= - //1, 1, 6 def : InstRW<[ORYONWrite_1Cyc_I012345], (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>; @@ -714,9 +709,6 @@ def : InstRW<[ORYONWrite_1Cyc_I0123], //Compare Instruciton //--- -// Table on P75 -//============= - // We have CCMP, CCMN as LLVM DAG node // CMP is an alias of SUBS as above // CMN is an alias of ADDS as above @@ -731,8 +723,6 @@ def : InstRW<[ORYONWrite_1Cyc_I0123], // Branch //--- -// Table on P76 -//============= def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>; def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>; def : InstRW<[ORYONWrite_1Cyc_I01], @@ -740,7 +730,6 @@ def : InstRW<[ORYONWrite_1Cyc_I01], def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>; def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>; -// Table on p47/p76 // 3 uOp, 1 cycle for branch, 7 cycle for Authentication, // 1 cycle for updating link register // V8.3a PAC @@ -749,17 +738,11 @@ def : InstRW<[ORYONWrite_9Cyc_I012], BRAA, BRAAZ, BRAB, BRABZ)>; def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>; -// we temporarily put it here. It needs fix. -// should LSU get involved? Need check? -// Should be a combined additive value of load and pauth def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>; // Logical Instructions //--- -// Table on P77 -//============= - //1,1,4 TST is an alias of ANDS def : InstRW<[ORYONWrite_1Cyc_I0123], (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>; @@ -784,9 +767,6 @@ def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], // Shift Instructions //--- -// Table on P78 -//============= - //1,1,6 def : InstRW<[ORYONWrite_1Cyc_I012345], (instregex "^ASRV(W|X)r", "^LSLV(W|X)r", @@ -797,9 +777,6 @@ def : InstRW<[ORYONWrite_1Cyc_I012345], // Move-Data Bit-field and Sign_Extension Instructions //--- -// Table on P78 -//============= - //1,1,6 def : InstRW<[ORYONWrite_1Cyc_I012345], (instregex "^MOVK(W|X)i", "^MOVN(W|X)i", @@ -814,10 +791,6 @@ def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>; // Reverse Instructions //--- -// Table on P79 -//============= - - //1,1,6 def : InstRW<[ORYONWrite_1Cyc_I012345], (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>; @@ -827,9 +800,6 @@ def : InstRW<[ORYONWrite_1Cyc_I012345], // Flag Manipulate Instructions //--- -// Table on P79 -//============= - //1,1,4 def : InstRW<[ORYONWrite_1Cyc_I0123], (instregex "^SETF8", "^SETF16", "^CFINV")>; @@ -838,9 +808,6 @@ def : InstRW<[ORYONWrite_1Cyc_I0123], // Miscellaneous Instructions //--- -// Table on P80 -//============= - //1,1,6 def : InstRW<[ORYONWrite_1Cyc_I012345], (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>; @@ -850,10 +817,6 @@ def : InstRW<[ORYONWrite_1Cyc_I012345], // Multiply Instructions //--- -// Table on P81 -//============= - - //1,3,2 def : InstRW<[ORYONWrite_3Cyc_I45], (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr", @@ -864,9 +827,6 @@ def : InstRW<[ORYONWrite_3Cyc_I45], // Divide Instructions //--- -// Table on P81 -//============= - def : InstRW<[ORYONWrite_7Cyc_I2_RC], (instregex "^(S|U)DIVWr")>; @@ -1429,10 +1389,6 @@ def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>; def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r", "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; -// floating comparison -def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMAX(NM)?")>; -def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMIN(NM)?")>; - // floating comparison write to NZCV def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>; def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>; @@ -1441,8 +1397,7 @@ def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>; def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>; // floating multiply-add -def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB", - "^(F|FN)MUL")>; +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>; // floating unary, cycle/throughput? xls row14 def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>; @@ -1493,14 +1448,6 @@ def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>; def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>; def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>; -// FMAX, FMIN -def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMAXv", "^FMAXNMv", - "^FMINv", "^FMINNMv")>; - -// floating multiply-add this is 4/3 need to fine tune -def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLA(L|L2)?v", - "^FMLS(L|L2)?v")>; - def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v", "^FRECPSv", "^FRSQRTSv")>; @@ -1508,14 +1455,12 @@ def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv", "^PMULv", "UABAv")>; def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv", - "^EXTv", "^TRN(1|2)v", "^(SH|UH)(ADD|SUB)v", "^S(MAX|MIN)v", "^(SQ|UQ)(ADD|SUB)v", - // no such instruction "^SQ(R)?((MULH|MLAH|MLSH)v", "^(SQ|SQR|UQ|UQR)SHLv", "^(SR|UR)HADDv", - "^(S|SR|U|UR)SHLv", + "^(SR|UR)SHLv", "^UABDv", "^U(MAX|MIN)v")>; // IMAX or UMAX in the above line @@ -1554,7 +1499,7 @@ def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CN "^NEGv", "^NOTv", "^RBITv", "^REV(16|32|64)v", "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v", - "^(SU|US)QADDv", "^(S|U)SHL(L|L2)v", + "^(SU|US)QADDv", "^UQXT(N|N2)v", "^XTN2?v")>; def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v", @@ -1575,13 +1520,9 @@ def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>; // SIMD binary elememt arithmetic instructions //========== -def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv","^FMULX?v", - "^F(MLA|MLS)(L|L2)v")>; - +def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>; -def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv", "^MLSv", "^MULv", - "^(S|U|SQD)(MLA|MLS|MUL)(L|L2)v", - "^SQDMULHv", +def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^SQDMULHv", "^SQRD(MLA|MLS|MUL)Hv")>; //========== @@ -1595,20 +1536,17 @@ def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v", // SIMD immediate instructions //========== -def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv", "^MOVIv", - "^MVNIv")>; +def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^MOVIv", "^MVNIv")>; //========== // SIMD shift(immediate) instructions //========== -// "^(S|U)XT(L|L2)v" counted as unary instruction as SHL def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv", "^(SHL|SHR)(N|N2)v", "^SLIv", "^(SQ|SQR)SHR(U)?(N|N2)v", "^(UQ|UQR)SHR(N|N2)v", - "^SQSHL(U)?v", - "^UQSHLv", + "^SQSHLUv", "^SRIv", "^(S|SR|U|UR)SHRv", "^(S|SR|U|UR)SRAv", @@ -1674,7 +1612,6 @@ def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>; //========== // 3,4 on IMLA, CRYP def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]", - "^PMULLv", "^SM3(TT1|TT2)(A|B)")>; // 2,4 on CRYP @@ -1708,10 +1645,10 @@ def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)", // 4,2 on IMLA def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>; -// 4,0.5 on IMLA FIX ME!!! +// 4,0.5 on IMLA def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>; -// FIX ME !!! no definition in the BIG Chart yet +// 4,0.5 on IMLA def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>; // 3,4 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits