https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/95478
>From 1461be872bf26e2e0f2572f688a45af795421432 Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Thu, 13 Jun 2024 10:27:52 -0700 Subject: [PATCH 1/2] [llvm][AArch64] Support -mcpu=apple-m4 --- .../llvm/TargetParser/AArch64TargetParser.h | 9 +++++- llvm/lib/Target/AArch64/AArch64Processors.td | 31 +++++++++++++++++-- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 ++ .../TargetParser/TargetParserTest.cpp | 17 +++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index df8e685eb6667..c1a68a0ec5c19 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -521,7 +521,14 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, AArch64::AEK_FP16, AArch64::AEK_FP16FML})}, - + // Technically apple-m4 is ARMv9.2a, but a quirk of LLVM defines v9.0 as + // requiring SVE, which is optional according to the Arm ARM and not + // supported by the core. ARMv8.7a is the next closest choice. + {"apple-m4", ARMV8_7A, + AArch64::ExtensionBitset( + {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3, + AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SME, + AArch64::AEK_SME2, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64})}, {"apple-s4", ARMV8_3A, AArch64::ExtensionBitset( {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16})}, diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index c04c20c78e8eb..57df6b85ab11d 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -398,6 +398,22 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", FeatureZCRegMove, FeatureZCZeroing]>; +def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4", + "Apple M4", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureZCRegMove, + FeatureZCZeroing + ]>; + def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", "Samsung Exynos-M3 processors", [FeatureExynosCheapAsMoveHandling, @@ -784,6 +800,14 @@ def ProcessorFeatures { FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX]; + // Technically apple-m4 is ARMv9.2. See the corresponding comment in + // AArch64TargetParser.h. + list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSHA3, + FeatureFullFP16, FeatureFP16FML, + FeatureAES, FeatureBF16, + FeatureSME2, + FeatureSMEF64F64, FeatureSMEI16I64]; list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, FeaturePerfMon]; list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, @@ -1010,6 +1034,9 @@ def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16, [TuneAppleA16]>; def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17, [TuneAppleA17]>; +def : ProcessorModel<"apple-m4", CycloneModel, ProcessorFeatures.AppleM4, + [TuneAppleM4]>; + // Mac CPUs def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, [TuneAppleA14]>; @@ -1025,8 +1052,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12, [TuneAppleA12]>; // Alias for the latest Apple processor model supported by LLVM. -def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16, - [TuneAppleA16]>; +def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleM4, + [TuneAppleM4]>; // Fujitsu A64FX def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 2920066cfdcff..1fad1d5ca6d7d 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -182,6 +182,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { case AppleA15: case AppleA16: case AppleA17: + case AppleM4: CacheLineSize = 64; PrefetchDistance = 280; MinPrefetchStride = 2048; @@ -191,6 +192,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { case AppleA15: case AppleA16: case AppleA17: + case AppleM4: MaxInterleaveFactor = 4; break; default: diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 23555dfc68dc0..ccc101e907441 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1644,6 +1644,21 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_I8MM, AArch64::AEK_JSCVT, AArch64::AEK_FCMA, AArch64::AEK_PAUTH}), "8.6-A"), + ARMCPUTestParams<AArch64::ExtensionBitset>( + "apple-m4", "armv8.7-a", "crypto-neon-fp-armv8", + AArch64::ExtensionBitset( + {AArch64::AEK_CRC, AArch64::AEK_AES, + AArch64::AEK_SHA2, AArch64::AEK_SHA3, + AArch64::AEK_FP, AArch64::AEK_SIMD, + AArch64::AEK_LSE, AArch64::AEK_RAS, + AArch64::AEK_RDM, AArch64::AEK_RCPC, + AArch64::AEK_DOTPROD, AArch64::AEK_FP16, + AArch64::AEK_FP16FML, AArch64::AEK_BF16, + AArch64::AEK_I8MM, AArch64::AEK_JSCVT, + AArch64::AEK_FCMA, AArch64::AEK_PAUTH, + AArch64::AEK_SME, AArch64::AEK_SME2, + AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64}), + "8.7-A"), ARMCPUTestParams<AArch64::ExtensionBitset>( "apple-s4", "armv8.3-a", "crypto-neon-fp-armv8", AArch64::ExtensionBitset( @@ -1872,7 +1887,7 @@ INSTANTIATE_TEST_SUITE_P( ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 79; +static constexpr unsigned NumAArch64CPUArchs = 80; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector<StringRef, NumAArch64CPUArchs> List; >From 4a6508c77500db03587692aac3c8330d8f18000c Mon Sep 17 00:00:00 2001 From: Jon Roelofs <jonathan_roel...@apple.com> Date: Thu, 13 Jun 2024 17:57:28 -0700 Subject: [PATCH 2/2] fixup clang test --- clang/test/Misc/target-invalid-cpu-note.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 5362c6f882c25..ffc57db825bbc 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -5,11 +5,11 @@ // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 // AARCH64: error: unknown target CPU 'not-a-cpu' -// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-m4, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-m4, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}} // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits