llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-aarch64 Author: Franklin (FLZ101) <details> <summary>Changes</summary> References: * Arm Neoverse N3 Software Optimization Guide * Arm A64 Instruction Set for A-profile architecture --- Patch is 1.64 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106371.diff 14 Files Affected: - (modified) clang/test/Misc/target-invalid-cpu-note/arm.c (+1) - (modified) llvm/include/llvm/TargetParser/ARMTargetParser.def (+3) - (modified) llvm/lib/Target/AArch64/AArch64.td (+1) - (modified) llvm/lib/Target/AArch64/AArch64Processors.td (+1-1) - (added) llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td (+2359) - (modified) llvm/test/CodeGen/AArch64/cpus.ll (+1) - (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s (+3725) - (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-mte-instructions.s (+350) - (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s (+3236) - (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s (+10262) - (added) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-writeback.s (+5320) - (modified) llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp (+4) - (modified) llvm/unittests/TargetParser/Host.cpp (+3) - (modified) llvm/unittests/TargetParser/TargetParserTest.cpp (+1-1) ``````````diff diff --git a/clang/test/Misc/target-invalid-cpu-note/arm.c b/clang/test/Misc/target-invalid-cpu-note/arm.c index 27608cc6eb29fc..278cd76bdf170e 100644 --- a/clang/test/Misc/target-invalid-cpu-note/arm.c +++ b/clang/test/Misc/target-invalid-cpu-note/arm.c @@ -88,6 +88,7 @@ // CHECK-SAME: {{^}}, cortex-x1c // CHECK-SAME: {{^}}, neoverse-n1 // CHECK-SAME: {{^}}, neoverse-n2 +// CHECK-SAME: {{^}}, neoverse-n3 // CHECK-SAME: {{^}}, neoverse-v1 // CHECK-SAME: {{^}}, cyclone // CHECK-SAME: {{^}}, exynos-m3 diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.def b/llvm/include/llvm/TargetParser/ARMTargetParser.def index e5a1ce54fd46a7..bf4ef09303d1e8 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParser.def +++ b/llvm/include/llvm/TargetParser/ARMTargetParser.def @@ -380,6 +380,9 @@ ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM_CPU_NAME("neoverse-n2", ARMV9A, FK_NEON_FP_ARMV8, false, (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_I8MM | ARM::AEK_RAS | ARM::AEK_SB )) +ARM_CPU_NAME("neoverse-n3", ARMV9_2A, FK_NEON_FP_ARMV8, false, + (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_FP16FML | + ARM::AEK_I8MM | ARM::AEK_RAS | ARM::AEK_SB )) ARM_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 251318fe4b5efd..9378081e675a85 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -112,6 +112,7 @@ include "AArch64SchedAmpere1.td" include "AArch64SchedAmpere1B.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" +include "AArch64SchedNeoverseN3.td" include "AArch64SchedNeoverseV1.td" include "AArch64SchedNeoverseV2.td" include "AArch64SchedOryon.td" diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 84d8cae3a0a5d1..8944eb88b4a4ff 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -1127,7 +1127,7 @@ def : ProcessorModel<"neoverse-n1", NeoverseN1Model, def : ProcessorModel<"neoverse-n2", NeoverseN2Model, ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>; def : ProcessorAlias<"cobalt-100", "neoverse-n2">; -def : ProcessorModel<"neoverse-n3", NeoverseN2Model, +def : ProcessorModel<"neoverse-n3", NeoverseN3Model, ProcessorFeatures.NeoverseN3, [TuneNeoverseN3]>; def : ProcessorModel<"neoverse-512tvb", NeoverseV1Model, ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td new file mode 100644 index 00000000000000..68568f6ec7ac78 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td @@ -0,0 +1,2359 @@ +//=- AArch64SchedNeoverseN3.td - NeoverseN3 Scheduling Defs --*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for the Arm Neoverse N3 processors. +// +//===----------------------------------------------------------------------===// + +def NeoverseN3Model : SchedMachineModel { + let IssueWidth = 10; // Micro-ops dispatched at a time. + let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. + let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, + [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]); +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Neoverse N3. +// Instructions are first fetched and then decoded into internal Macro-OPerations +// (MOPs). From there, the MOPs proceed through register renaming and dispatch stages. +// A MOP can be split into two Micro-OPerations (µOPs) further down the pipeline +// after the decode stage. Once dispatched, µOPs wait for their operands and issue +// out-of-order to one of thirteen issue pipelines. Each issue pipeline can accept +// one µOP per cycle. + +let SchedModel = NeoverseN3Model in { + +// Define the (13) issue ports. +def N3UnitB : ProcResource<2>; // Branch 0/1 +def N3UnitS : ProcResource<2>; // Integer Single-Cycle 0/1 +def N3UnitM0 : ProcResource<1>; // Integer Single/Multi-Cycle 0 +def N3UnitM1 : ProcResource<1>; // Integer Single/Multi-Cycle 1 +def N3UnitV0 : ProcResource<1>; // FP/ASIMD 0 +def N3UnitV1 : ProcResource<1>; // FP/ASIMD 1 +def N3UnitD : ProcResource<2>; // Integer Store data 0/1 +def N3UnitL01 : ProcResource<2>; // Load/Store 0/1 +def N3UnitL2 : ProcResource<1>; // Load 2 + +def N3UnitI : ProcResGroup<[N3UnitS, N3UnitM0, N3UnitM1]>; +def N3UnitM : ProcResGroup<[N3UnitM0, N3UnitM1]>; +def N3UnitL : ProcResGroup<[N3UnitL01, N3UnitL2]>; +def N3UnitV : ProcResGroup<[N3UnitV0, N3UnitV1]>; + +//===----------------------------------------------------------------------===// + +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadST, 0>; +def : ReadAdvance<ReadVLD, 0>; + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } +def : WriteRes<WriteBarrier, []> { let Unsupported = 1; } +def : WriteRes<WriteHint, []> { let Unsupported = 1; } + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Neoverse N3. + +//===----------------------------------------------------------------------===// +// Define generic 0 micro-op types + +def N3Write_0c : SchedWriteRes<[]> { + let Latency = 0; + let NumMicroOps = 0; +} + +def N3Write_4c : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define generic 1 micro-op types + +def N3Write_1c_1B : SchedWriteRes<[N3UnitB]> { let Latency = 1; } +def N3Write_1c_1I : SchedWriteRes<[N3UnitI]> { let Latency = 1; } +def N3Write_2c_1M : SchedWriteRes<[N3UnitM]> { let Latency = 2; } +def N3Write_2c_1M0 : SchedWriteRes<[N3UnitM0]> { let Latency = 2; } +def N3Write_3c_1M : SchedWriteRes<[N3UnitM]> { let Latency = 3; } +def N3Write_1c_1M : SchedWriteRes<[N3UnitM]> { let Latency = 1; } +def N3Write_4c_1M : SchedWriteRes<[N3UnitM]> { let Latency = 4; } +def N3Write_1c_1S : SchedWriteRes<[N3UnitS]> { let Latency = 1; } +def N3Write_4c_1L : SchedWriteRes<[N3UnitL]> { let Latency = 4; } +def N3Write_2c_1V : SchedWriteRes<[N3UnitV]> { let Latency = 2; } +def N3Write_5c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 5; } +def N3Write_7c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 7; } +def N3Write_12c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 12; } +def N3Write_3c_1V : SchedWriteRes<[N3UnitV]> { let Latency = 3; } +def N3Write_4c_1V : SchedWriteRes<[N3UnitV]> { let Latency = 4; } +def N3Write_3c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 3; } +def N3Write_3c_1M0 : SchedWriteRes<[N3UnitM0]> { let Latency = 3; } +def N3Write_6c_1L : SchedWriteRes<[N3UnitL]> { let Latency = 6; } +def N3Write_4c_1V1 : SchedWriteRes<[N3UnitV1]> { let Latency = 4; } +def N3Write_3c_1V1 : SchedWriteRes<[N3UnitV1]> { let Latency = 3; } +def N3Write_4c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 4; } +def N3Write_2c_1V0 : SchedWriteRes<[N3UnitV0]> { let Latency = 2; } +def N3Write_2c_1V1 : SchedWriteRes<[N3UnitV1]> { let Latency = 2; } +def N3Write_5c_1V : SchedWriteRes<[N3UnitV]> { let Latency = 5; } +def N3Write_1c_1L01 : SchedWriteRes<[N3UnitL01]> { let Latency = 1; } + +def N3Write_12c_1M0 : SchedWriteRes<[N3UnitM0]> { + let Latency = 12; + let ReleaseAtCycles = [12]; +} + +def N3Write_20c_1M0 : SchedWriteRes<[N3UnitM0]> { + let Latency = 20; + let ReleaseAtCycles = [20]; +} + +//===----------------------------------------------------------------------===// +// Define generic 2 micro-op types + +def N3Write_1c_2I : SchedWriteRes<[N3UnitI]> { + let Latency = 1; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_1c_1B_1S : SchedWriteRes<[N3UnitB, N3UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def N3Write_2c_1M_1B : SchedWriteRes<[N3UnitM, N3UnitB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N3Write_5c_1L_1S : SchedWriteRes<[N3UnitL, N3UnitS]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N3Write_4c_2L : SchedWriteRes<[N3UnitL]> { + let Latency = 4; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_3c_1L01_1V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def N3Write_1c_1L01_1D : SchedWriteRes<[N3UnitL01, N3UnitD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def N3Write_5c_1L_1I : SchedWriteRes<[N3UnitL, N3UnitI]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def N3Write_6c_2L : SchedWriteRes<[N3UnitL]> { + let Latency = 6; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_2c_1L01_1V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N3Write_6c_2V1 : SchedWriteRes<[N3UnitV1]> { + let Latency = 6; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_4c_2V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 4; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_8c_2V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 8; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_13c_2V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 13; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_4c_2V : SchedWriteRes<[N3UnitV]> { + let Latency = 4; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_2c_2V : SchedWriteRes<[N3UnitV]> { + let Latency = 2; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_8c_1L_1V : SchedWriteRes<[N3UnitL, N3UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def N3Write_2c_1V_1L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def N3Write_5c_2V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 5; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_6c_2V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 6; + let NumMicroOps = 2; + let ReleaseAtCycles = [2]; +} + +def N3Write_7c_1L_1M : SchedWriteRes<[N3UnitL, N3UnitM]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def N3Write_8c_1V_1L : SchedWriteRes<[N3UnitV, N3UnitL]> { + let Latency = 8; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define generic 3 micro-op types + +def N3Write_5c_1M0_2V : SchedWriteRes<[N3UnitM0, N3UnitV]> { + let Latency = 5; + let NumMicroOps = 3; + let ReleaseAtCycles = [1, 2]; +} + +def N3Write_5c_1V1_2V : SchedWriteRes<[N3UnitV1, N3UnitV]> { + let Latency = 5; + let NumMicroOps = 3; + let ReleaseAtCycles = [1, 2]; +} + +def N3Write_6c_3V : SchedWriteRes<[N3UnitV]> { + let Latency = 6; + let NumMicroOps = 3; + let ReleaseAtCycles = [3]; +} + +def N3Write_4c_3V : SchedWriteRes<[N3UnitV]> { + let Latency = 4; + let NumMicroOps = 3; + let ReleaseAtCycles = [3]; +} + +def N3Write_6c_3L : SchedWriteRes<[N3UnitL]> { + let Latency = 6; + let NumMicroOps = 3; + let ReleaseAtCycles = [3]; +} + +def N3Write_8c_2L_1V : SchedWriteRes<[N3UnitL, N3UnitV]> { + let Latency = 8; + let NumMicroOps = 3; + let ReleaseAtCycles = [2, 1]; +} + +def N3Write_8c_1M0_2V : SchedWriteRes<[N3UnitM0, N3UnitV]> { + let Latency = 8; + let NumMicroOps = 3; + let ReleaseAtCycles = [1, 2]; +} + +def N3Write_7c_2V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV1]> { + let Latency = 7; + let NumMicroOps = 3; + let ReleaseAtCycles = [2, 1]; +} + +def N3Write_5c_2V_1V1 : SchedWriteRes<[N3UnitV, N3UnitV1]> { + let Latency = 5; + let NumMicroOps = 3; + let ReleaseAtCycles = [2, 1]; +} + +//===----------------------------------------------------------------------===// +// Define generic 4 micro-op types + +def N3Write_5c_1M_1L_2I : SchedWriteRes<[N3UnitM, N3UnitL, N3UnitI]> { + let Latency = 5; + let NumMicroOps = 4; + let ReleaseAtCycles = [1, 1, 2]; +} + +def N3Write_4c_2I_2L : SchedWriteRes<[N3UnitI, N3UnitL]> { + let Latency = 4; + let NumMicroOps = 4; + let ReleaseAtCycles = [2, 2]; +} + +def N3Write_1c_1L01_1D_2I : SchedWriteRes<[N3UnitL01, N3UnitD, N3UnitI]> { + let Latency = 1; + let NumMicroOps = 4; + let ReleaseAtCycles = [1, 1, 2]; +} + +def N3Write_2c_2I_1L01_1V : SchedWriteRes<[N3UnitI, N3UnitL01, N3UnitV]> { + let Latency = 2; + let NumMicroOps = 4; + let ReleaseAtCycles = [2, 1, 1]; +} + +def N3Write_6c_4V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 6; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_8c_4V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 8; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_10c_4V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 10; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_6c_4V : SchedWriteRes<[N3UnitV]> { + let Latency = 6; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_7c_4L : SchedWriteRes<[N3UnitL]> { + let Latency = 7; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_2c_2L01_2V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 2; + let NumMicroOps = 4; + let ReleaseAtCycles = [2, 2]; +} + +def N3Write_4c_2V_2L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 4; + let NumMicroOps = 4; + let ReleaseAtCycles = [2, 2]; +} + +def N3Write_2c_2V_2L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 2; + let NumMicroOps = 4; + let ReleaseAtCycles = [2, 2]; +} + +def N3Write_8c_4V : SchedWriteRes<[N3UnitV]> { + let Latency = 8; + let NumMicroOps = 4; + let ReleaseAtCycles = [4]; +} + +def N3Write_2c_1L01_2I_1V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> { + let Latency = 2; + let NumMicroOps = 4; + let ReleaseAtCycles = [1, 2, 1]; +} + +//===----------------------------------------------------------------------===// +// Define generic 5 micro-op types + +def N3Write_7c_2M_1M0_2V : SchedWriteRes<[N3UnitM, N3UnitM0, N3UnitV]> { + let Latency = 7; + let NumMicroOps = 5; + let ReleaseAtCycles = [2, 1, 2]; +} + +//===----------------------------------------------------------------------===// +// Define generic 6 micro-op types + +def N3Write_4c_3V_3L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 4; + let NumMicroOps = 6; + let ReleaseAtCycles = [3, 3]; +} + +def N3Write_2c_3V_3L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 2; + let NumMicroOps = 6; + let ReleaseAtCycles = [3, 3]; +} + +def N3Write_8c_4V_2V1 : SchedWriteRes<[N3UnitV, N3UnitV1]> { + let Latency = 8; + let NumMicroOps = 6; + let ReleaseAtCycles = [4, 2]; +} + +def N3Write_4c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 4; + let NumMicroOps = 6; + let ReleaseAtCycles = [3, 3]; +} + +def N3Write_3c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 3; + let NumMicroOps = 6; + let ReleaseAtCycles = [3, 3]; +} + +def N3Write_6c_3L01_3V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 6; + let NumMicroOps = 6; + let ReleaseAtCycles = [3, 3]; +} + +//===----------------------------------------------------------------------===// +// Define generic 7 micro-op types + +def N3Write_8c_4L_3V : SchedWriteRes<[N3UnitL, N3UnitV]> { + let Latency = 8; + let NumMicroOps = 7; + let ReleaseAtCycles = [4, 3]; +} + +def N3Write_10c_4L_3V : SchedWriteRes<[N3UnitL, N3UnitV]> { + let Latency = 10; + let NumMicroOps = 7; + let ReleaseAtCycles = [4, 3]; +} + +def N3Write_8c_3V_4L : SchedWriteRes<[N3UnitV, N3UnitL]> { + let Latency = 8; + let NumMicroOps = 7; + let ReleaseAtCycles = [3, 4]; +} + +//===----------------------------------------------------------------------===// +// Define generic 8 micro-op types + +def N3Write_12c_8V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 12; + let NumMicroOps = 8; + let ReleaseAtCycles = [8]; +} + +def N3Write_4c_4V_4L01 : SchedWriteRes<[N3UnitV, N3UnitL01]> { + let Latency = 4; + let NumMicroOps = 8; + let ReleaseAtCycles = [4, 4]; +} + +def N3Write_8c_8V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 8; + let NumMicroOps = 8; + let ReleaseAtCycles = [8]; +} + +def N3Write_16c_8V : SchedWriteRes<[N3UnitV]> { + let Latency = 16; + let NumMicroOps = 8; + let ReleaseAtCycles = [8]; +} + +def N3Write_3c_4L01_4V : SchedWriteRes<[N3UnitL01, N3UnitV]> { + let Latency = 3; + let NumMicroOps = 8; + let ReleaseAtCycles = [4, 4]; +} + +//===----------------------------------------------------------------------===// +// Define generic 10 micro-op types + +def N3Write_8c_6L_4V : SchedWriteRes<[N3UnitL, N3UnitV]> { + let Latency = 8; + let NumMicroOps = 10; + let ReleaseAtCycles = [6, 4]; +} + +def N3Write_8c_4V_6L : SchedWriteRes<[N3UnitV, N3UnitL]> { + let Latency = 8; + let NumMicroOps = 10; + let ReleaseAtCycles = [4, 6]; +} + +//===----------------------------------------------------------------------===// +// Define generic 12 micro-op types + +def N3Write_12c_5V_7L : SchedWriteRes<[N3UnitV, N3UnitL]> { + let Latency = 12; + let NumMicroOps = 12; + let ReleaseAtCycles = [5, 7]; +} + +def N3Write_4c_3L01_6I_3V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> { + let Latency = 4; + let NumMicroOps = 12; + let ReleaseAtCycles = [3, 6, 3]; +} + +def N3Write_3c_3L01_6I_3V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> { + let Latency = 3; + let NumMicroOps = 12; + let ReleaseAtCycles = [3, 6, 3]; +} + +def N3Write_6c_3L01_6I_3V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> { + let Latency = 6; + let NumMicroOps = 12; + let ReleaseAtCycles = [3, 6, 3]; +} + +//===----------------------------------------------------------------------===// +// Define generic 13 micro-op types + +def N3Write_9c_3V_4L_6I : SchedWriteRes<[N3UnitV, N3UnitL, N3UnitI]> { + let Latency = 9; + let NumMicroOps = 13; + let ReleaseAtCycles = [3, 4, 6]; +} + +//===----------------------------------------------------------------------===// +// Define generic 15 micro-op types + +def N3Write_10c_6V_9L : SchedWriteRes<[N3UnitV, N3UnitL]> { + let Latency = 10; + let NumMicroOps = 15; + let ReleaseAtCycles = [6, 9]; +} + +//===----------------------------------------------------------------------===// +// Define generic 16 micro-op types + +def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0]> { + let Latency = 16; + let NumMicroOps = 16; + let ReleaseAtCycles = [16]; +} + +def N3Write_3c_4L01_8I_4V : SchedWriteRes<[N3UnitL01, N3UnitI, N3UnitV]> { + let Latency = 3; + let NumMicroOps = 16; + let ReleaseAtCycles = [4, 8, 4]; +} + +//===----------------------------------------------------------------------===// +// Define generic 18 micro-op types + +def N3Write_9c_6L_4V_8I : SchedWriteRes<[N3Uni... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/106371 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits