https://github.com/SpencerAbson created https://github.com/llvm/llvm-project/pull/104602
The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which: - FEAT_SVE_B16B16 was weakened - FEAT_SME_B16B16 was introduced New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches. - https://github.com/llvm/llvm-project/pull/101480 - https://github.com/llvm/llvm-project/pull/102501 Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change. >From f04b2e8eea2f0d542cc6eea219dcf667cd4f3f7d Mon Sep 17 00:00:00 2001 From: Spencer Abson <spencer.ab...@arm.com> Date: Fri, 16 Aug 2024 14:39:43 +0000 Subject: [PATCH] [AArch64] Adopt updated B16B16 target flags The enablement of SVE/SME non-widening BFloat16 instructions was recently changed in response to an architecture update, in which: - FEAT_SVE_B16B16 was weakened - FEAT_SME_B16B16 was introduced New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the existing 'b16b16'. This was acheived in the below two patches. - https://github.com/llvm/llvm-project/pull/101480 - https://github.com/llvm/llvm-project/pull/102501 Ideally, the interface change introduced here will be valid in LLVM-19. We do not see it necessary to back-port the entire change, but just to add 'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged) 'b16b16' and 'sme2' flags which together cover all of these features. The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also fixed in this change. --- clang/include/clang/Basic/arm_sve.td | 26 +++++++++++++++---- .../print-supported-extensions-aarch64.c | 2 ++ llvm/lib/Target/AArch64/AArch64Features.td | 9 +++++++ .../TargetParser/TargetParserTest.cpp | 15 ++++++++++- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 94c093d8911562..fb11d743fd6479 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2116,7 +2116,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_ multiclass MinMaxIntr<string i, string zm, string mul, string t> { def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; - def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { } multiclass SInstMinMaxByVector<string name> { - def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; - def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; + def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; - def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; - def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; + def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { @@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } +multiclass BfSingleMultiVector<string name> { + def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>; + + def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>; +} + let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in { def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x2", [IsStreaming], []>; def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "b", MergeNone, "aarch64_sve_bfclamp_single_x4", [IsStreaming], []>; + + // bfmin, bfmax (single, multi) + defm SVBFMIN : BfSingleMultiVector<"min">; + defm SVBFMAX : BfSingleMultiVector<"max">; + + // bfminnm, bfmaxnm (single, multi) + defm SVBFMINNM : BfSingleMultiVector<"minnm">; + defm SVBFMAXNM : BfSingleMultiVector<"maxnm">; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index 6b969d50610f8b..cbd76a5062a594 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -55,6 +55,7 @@ // CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support // CHECK-NEXT: sm4 FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support // CHECK-NEXT: sme FEAT_SME Enable Scalable Matrix Extension (SME) +// CHECK-NEXT: sme-b16b16 FEAT_SME_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 instructions // CHECK-NEXT: sme-f16f16 FEAT_SME_F16F16 Enable SME non-widening Float16 instructions // CHECK-NEXT: sme-f64f64 FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions // CHECK-NEXT: sme-f8f16 FEAT_SME_F8F16 Enable Scalable Matrix Extension (SME) F8F16 instructions @@ -71,6 +72,7 @@ // CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions // CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions // CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions // CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions // CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions // CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index a1ae0873fc1902..e8ce88a6bd64f0 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1", def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16", "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>; +// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and +// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs +// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19. +def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16", + "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>; + +def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16", + "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>; + def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16", "Enable SME non-widening Float16 instructions", [FeatureSME2]>; diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 3d55b0309d26fd..9d08dd83684c90 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -2005,6 +2005,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_CPA, AArch64::AEK_PAUTHLR, AArch64::AEK_TLBIW, AArch64::AEK_JSCVT, AArch64::AEK_FCMA, AArch64::AEK_FP8, + AArch64::AEK_SMEB16B16, AArch64::AEK_SVEB16B16, }; @@ -2043,6 +2044,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+rcpc")); EXPECT_TRUE(llvm::is_contained(Features, "+rand")); @@ -2063,6 +2065,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64f64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-i16i64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-f16f16")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+sme2")); EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1")); EXPECT_TRUE(llvm::is_contained(Features, "+hbc")); @@ -2188,6 +2191,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"lse", "nolse", "+lse", "-lse"}, {"rdm", "nordm", "+rdm", "-rdm"}, {"sve", "nosve", "+sve", "-sve"}, + {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"}, {"sve2", "nosve2", "+sve2", "-sve2"}, {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"}, {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"}, @@ -2212,6 +2216,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"}, {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"}, {"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"}, + {"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"}, {"sme2", "nosme2", "+sme2", "-sme2"}, {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"}, {"hbc", "nohbc", "+hbc", "-hbc"}, @@ -2452,6 +2457,12 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nobf16", "b16b16"}, {"bf16", "b16b16"}, {}}, {AArch64::ARMV8A, {"b16b16", "nobf16"}, {}, {"bf16", "b16b16"}}, + // b16b16 -> {sve-b16b16, sme-b16b16} + {AArch64::ARMV8A, {"nob16b16", "sve-b16b16"}, {"b16b16", "sve-b16b16"}, {}}, + {AArch64::ARMV8A, {"sve-b16b16", "nob16b16"}, {}, {"sve-b16b16", "b16b16"}}, + {AArch64::ARMV8A, {"nob16b16", "sme-b16b16"}, {"b16b16", "sme-b16b16"}, {}}, + {AArch64::ARMV8A, {"sme-b16b16", "nob16b16"}, {}, {"b16b16", "sme-b16b16"}}, + // sve -> {sve2, f32mm, f64mm} {AArch64::ARMV8A, {"nosve", "sve2"}, {"sve", "sve2"}, {}}, {AArch64::ARMV8A, {"sve2", "nosve"}, {}, {"sve", "sve2"}}, @@ -2491,7 +2502,7 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"sme-fa64", "nosme"}, {}, {"sme", "sme-fa64"}}, // sme2 -> {sme2p1, ssve-fp8fma, ssve-fp8dot2, ssve-fp8dot4, sme-f8f16, - // sme-f8f32} + // sme-f8f32, sme-b16b16} {AArch64::ARMV8A, {"nosme2", "sme2p1"}, {"sme2", "sme2p1"}, {}}, {AArch64::ARMV8A, {"sme2p1", "nosme2"}, {}, {"sme2", "sme2p1"}}, {AArch64::ARMV8A, @@ -2522,6 +2533,8 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"sme-f8f16", "nosme2"}, {}, {"sme2", "sme-f8f16"}}, {AArch64::ARMV8A, {"nosme2", "sme-f8f32"}, {"sme2", "sme-f8f32"}, {}}, {AArch64::ARMV8A, {"sme-f8f32", "nosme2"}, {}, {"sme2", "sme-f8f32"}}, + {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}}, + {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}}, // fp8 -> {sme-f8f16, sme-f8f32} {AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}}, _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits